Skip to content

API Reference

QuartumSE - Quantum Measurement Optimization & Observability Platform

A vendor-neutral framework for running quantum experiments with: - Classical shadows for shot-efficient observable estimation - Rigorous error mitigation and confidence intervals - Full provenance tracking and reproducibility - Cross-platform backend support (IBM, AWS, and more) - Publication-grade benchmarking per Measurements Bible

License: Apache 2.0

BenchmarkMode

Bases: Enum

Benchmark execution modes.

Source code in src/quartumse/benchmark_suite.py
23
24
25
26
27
28
class BenchmarkMode(Enum):
    """Benchmark execution modes."""

    BASIC = "basic"  # Core benchmark + Tasks 1,3,6
    COMPLETE = "complete"  # All 8 tasks
    ANALYSIS = "analysis"  # Complete + enhanced analysis

BenchmarkSuiteConfig dataclass

Configuration for benchmark suite execution.

Attributes:

Name Type Description
mode BenchmarkMode

Execution mode (basic, complete, analysis)

n_shots_grid list[int]

Shot budgets to evaluate

n_replicates int

Number of replicates per configuration

seed int

Base random seed

epsilon float

Target precision for tasks

delta float

Failure probability

compute_truth bool

Whether to compute ground truth

shadows_protocol_id str

Protocol ID for shadows (for comparison)

baseline_protocol_id str

Protocol ID for baseline (for comparison)

output_base_dir str

Base directory for outputs (timestamped subdir created)

noise_profile str | None

Noise profile ID (e.g., "readout_1e-2", "depol_medium"). If None or "ideal", runs noiseless simulation.

Source code in src/quartumse/benchmark_suite.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
@dataclass
class BenchmarkSuiteConfig:
    """Configuration for benchmark suite execution.

    Attributes:
        mode: Execution mode (basic, complete, analysis)
        n_shots_grid: Shot budgets to evaluate
        n_replicates: Number of replicates per configuration
        seed: Base random seed
        epsilon: Target precision for tasks
        delta: Failure probability
        compute_truth: Whether to compute ground truth
        shadows_protocol_id: Protocol ID for shadows (for comparison)
        baseline_protocol_id: Protocol ID for baseline (for comparison)
        output_base_dir: Base directory for outputs (timestamped subdir created)
        noise_profile: Noise profile ID (e.g., "readout_1e-2", "depol_medium").
            If None or "ideal", runs noiseless simulation.
    """

    mode: BenchmarkMode = BenchmarkMode.COMPLETE
    n_shots_grid: list[int] = field(default_factory=lambda: [100, 500, 1000, 5000])
    n_replicates: int = 20
    seed: int = 42
    epsilon: float = 0.01
    delta: float = 0.05
    compute_truth: bool = True
    shadows_protocol_id: str = "classical_shadows_v0"
    baseline_protocol_id: str = "direct_grouped"
    output_base_dir: str = "benchmark_results"
    noise_profile: str | None = None  # Noise profile ID (e.g., "readout_1e-2", "depol_medium")
    timeout_per_protocol_s: float | None = None  # Per-protocol timeout in seconds
    hw_timing_profile: Any | None = None  # HardwareTimingProfile for hw time estimates

    def to_dict(self) -> dict[str, Any]:
        return {
            "mode": self.mode.value,
            "n_shots_grid": self.n_shots_grid,
            "n_replicates": self.n_replicates,
            "seed": self.seed,
            "epsilon": self.epsilon,
            "delta": self.delta,
            "compute_truth": self.compute_truth,
            "shadows_protocol_id": self.shadows_protocol_id,
            "baseline_protocol_id": self.baseline_protocol_id,
            "output_base_dir": self.output_base_dir,
            "noise_profile": self.noise_profile,
            "timeout_per_protocol_s": self.timeout_per_protocol_s,
        }

BenchmarkSuiteResult dataclass

Complete result from benchmark suite execution.

Attributes:

Name Type Description
run_id str

Unique run identifier

timestamp datetime

Execution timestamp

mode BenchmarkMode

Benchmark mode used

output_dir Path

Directory containing all outputs

ground_truth Any | None

Ground truth result (if computed)

long_form_results list

List of LongFormRow

task_results dict[str, Any]

Dict of task outputs (basic tasks)

all_task_results dict[str, Any] | None

Dict of all 8 task outputs (complete mode)

analysis Any | None

ComprehensiveBenchmarkAnalysis (analysis mode)

reports dict[str, Path]

Dict of report paths

summary dict[str, Any]

Summary statistics

Source code in src/quartumse/benchmark_suite.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
@dataclass
class BenchmarkSuiteResult:
    """Complete result from benchmark suite execution.

    Attributes:
        run_id: Unique run identifier
        timestamp: Execution timestamp
        mode: Benchmark mode used
        output_dir: Directory containing all outputs
        ground_truth: Ground truth result (if computed)
        long_form_results: List of LongFormRow
        task_results: Dict of task outputs (basic tasks)
        all_task_results: Dict of all 8 task outputs (complete mode)
        analysis: ComprehensiveBenchmarkAnalysis (analysis mode)
        reports: Dict of report paths
        summary: Summary statistics
    """

    run_id: str
    timestamp: datetime
    mode: BenchmarkMode
    output_dir: Path
    ground_truth: Any | None
    long_form_results: list
    task_results: dict[str, Any]
    all_task_results: dict[str, Any] | None
    analysis: Any | None  # ComprehensiveBenchmarkAnalysis
    reports: dict[str, Path]
    summary: dict[str, Any]

    def to_dict(self) -> dict[str, Any]:
        return {
            "run_id": self.run_id,
            "timestamp": self.timestamp.isoformat(),
            "mode": self.mode.value,
            "output_dir": str(self.output_dir),
            "n_long_form_rows": len(self.long_form_results),
            "tasks_completed": list(self.task_results.keys()),
            "all_tasks_completed": (
                list(self.all_task_results.keys()) if self.all_task_results else []
            ),
            "has_analysis": self.analysis is not None,
            "reports": {k: str(v) for k, v in self.reports.items()},
            "summary": self.summary,
        }

ClassicalShadows

Bases: ABC

Abstract base class for classical shadows implementations.

Different versions (v0-v4) subclass this to provide specific algorithms.

Source code in src/quartumse/shadows/core.py
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
class ClassicalShadows(ABC):
    """
    Abstract base class for classical shadows implementations.

    Different versions (v0-v4) subclass this to provide specific algorithms.
    """

    def __init__(self, config: Any):
        self.config = config
        self.shadow_data: np.ndarray | None = None
        self.measurement_bases: np.ndarray | None = None
        self.measurement_outcomes: np.ndarray | None = None

    @abstractmethod
    def generate_measurement_circuits(
        self, base_circuit: QuantumCircuit, num_shadows: int
    ) -> list[QuantumCircuit]:
        """
        Generate randomized measurement circuits for shadows protocol.

        Args:
            base_circuit: The state preparation circuit
            num_shadows: Number of random measurements

        Returns:
            List of circuits with randomized measurements appended
        """
        pass

    @abstractmethod
    def reconstruct_classical_shadow(
        self, measurement_outcomes: np.ndarray, measurement_bases: np.ndarray
    ) -> np.ndarray:
        """
        Reconstruct classical shadow snapshots from measurement data.

        Args:
            measurement_outcomes: Binary outcomes (0/1) for each measurement
            measurement_bases: Which basis was measured for each qubit

        Returns:
            Array of shadow snapshots (density matrix representations)
        """
        pass

    @abstractmethod
    def estimate_observable(
        self, observable: Observable, shadow_data: np.ndarray | None = None
    ) -> ShadowEstimate:
        """
        Estimate expectation value of an observable using shadow data.

        Args:
            observable: The observable to estimate
            shadow_data: Pre-computed shadow snapshots (or use self.shadow_data)

        Returns:
            Estimate with confidence interval
        """
        pass

    @abstractmethod
    def estimate_shadow_size_needed(self, observable: Observable, target_precision: float) -> int:
        """Estimate the number of shadows required for a desired precision."""

        raise NotImplementedError

    def estimate_multiple_observables(
        self, observables: list[Observable]
    ) -> dict[str, ShadowEstimate]:
        """
        Estimate multiple observables from the same shadow data.

        This is the key advantage: one shadow dataset, many observables.
        """
        if self.shadow_data is None:
            raise ValueError("No shadow data available. Run generate_measurement_circuits first.")

        results = {}
        for obs in observables:
            estimate = self.estimate_observable(obs)
            results[str(obs)] = estimate

        return results

    def compute_variance_bound(self, observable: Observable, shadow_size: int) -> float:
        """
        Theoretical variance bound for the shadow estimator.

        Useful for shot allocation and adaptive strategies.
        """
        # Default implementation (subclasses can override)
        # For random local Clifford: Var ≤ 4^k / M, where k = support size
        support_size = sum(1 for p in observable.pauli_string if p != "I")
        return float(4**support_size) / float(shadow_size)

    def compute_confidence_interval(
        self, mean: float, variance: float, n_samples: int, confidence: float = 0.95
    ) -> tuple[float, float]:
        """Compute confidence interval using normal approximation."""
        from scipy import stats

        std_error = np.sqrt(variance / n_samples)
        z_score = float(stats.norm.ppf((1 + confidence) / 2))

        ci_lower = mean - z_score * std_error
        ci_upper = mean + z_score * std_error

        return (ci_lower, ci_upper)

compute_confidence_interval(mean, variance, n_samples, confidence=0.95)

Compute confidence interval using normal approximation.

Source code in src/quartumse/shadows/core.py
150
151
152
153
154
155
156
157
158
159
160
161
162
def compute_confidence_interval(
    self, mean: float, variance: float, n_samples: int, confidence: float = 0.95
) -> tuple[float, float]:
    """Compute confidence interval using normal approximation."""
    from scipy import stats

    std_error = np.sqrt(variance / n_samples)
    z_score = float(stats.norm.ppf((1 + confidence) / 2))

    ci_lower = mean - z_score * std_error
    ci_upper = mean + z_score * std_error

    return (ci_lower, ci_upper)

compute_variance_bound(observable, shadow_size)

Theoretical variance bound for the shadow estimator.

Useful for shot allocation and adaptive strategies.

Source code in src/quartumse/shadows/core.py
139
140
141
142
143
144
145
146
147
148
def compute_variance_bound(self, observable: Observable, shadow_size: int) -> float:
    """
    Theoretical variance bound for the shadow estimator.

    Useful for shot allocation and adaptive strategies.
    """
    # Default implementation (subclasses can override)
    # For random local Clifford: Var ≤ 4^k / M, where k = support size
    support_size = sum(1 for p in observable.pauli_string if p != "I")
    return float(4**support_size) / float(shadow_size)

estimate_multiple_observables(observables)

Estimate multiple observables from the same shadow data.

This is the key advantage: one shadow dataset, many observables.

Source code in src/quartumse/shadows/core.py
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
def estimate_multiple_observables(
    self, observables: list[Observable]
) -> dict[str, ShadowEstimate]:
    """
    Estimate multiple observables from the same shadow data.

    This is the key advantage: one shadow dataset, many observables.
    """
    if self.shadow_data is None:
        raise ValueError("No shadow data available. Run generate_measurement_circuits first.")

    results = {}
    for obs in observables:
        estimate = self.estimate_observable(obs)
        results[str(obs)] = estimate

    return results

estimate_observable(observable, shadow_data=None) abstractmethod

Estimate expectation value of an observable using shadow data.

Parameters:

Name Type Description Default
observable Observable

The observable to estimate

required
shadow_data ndarray | None

Pre-computed shadow snapshots (or use self.shadow_data)

None

Returns:

Type Description
ShadowEstimate

Estimate with confidence interval

Source code in src/quartumse/shadows/core.py
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
@abstractmethod
def estimate_observable(
    self, observable: Observable, shadow_data: np.ndarray | None = None
) -> ShadowEstimate:
    """
    Estimate expectation value of an observable using shadow data.

    Args:
        observable: The observable to estimate
        shadow_data: Pre-computed shadow snapshots (or use self.shadow_data)

    Returns:
        Estimate with confidence interval
    """
    pass

estimate_shadow_size_needed(observable, target_precision) abstractmethod

Estimate the number of shadows required for a desired precision.

Source code in src/quartumse/shadows/core.py
115
116
117
118
119
@abstractmethod
def estimate_shadow_size_needed(self, observable: Observable, target_precision: float) -> int:
    """Estimate the number of shadows required for a desired precision."""

    raise NotImplementedError

generate_measurement_circuits(base_circuit, num_shadows) abstractmethod

Generate randomized measurement circuits for shadows protocol.

Parameters:

Name Type Description Default
base_circuit QuantumCircuit

The state preparation circuit

required
num_shadows int

Number of random measurements

required

Returns:

Type Description
list[QuantumCircuit]

List of circuits with randomized measurements appended

Source code in src/quartumse/shadows/core.py
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
@abstractmethod
def generate_measurement_circuits(
    self, base_circuit: QuantumCircuit, num_shadows: int
) -> list[QuantumCircuit]:
    """
    Generate randomized measurement circuits for shadows protocol.

    Args:
        base_circuit: The state preparation circuit
        num_shadows: Number of random measurements

    Returns:
        List of circuits with randomized measurements appended
    """
    pass

reconstruct_classical_shadow(measurement_outcomes, measurement_bases) abstractmethod

Reconstruct classical shadow snapshots from measurement data.

Parameters:

Name Type Description Default
measurement_outcomes ndarray

Binary outcomes (0/1) for each measurement

required
measurement_bases ndarray

Which basis was measured for each qubit

required

Returns:

Type Description
ndarray

Array of shadow snapshots (density matrix representations)

Source code in src/quartumse/shadows/core.py
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
@abstractmethod
def reconstruct_classical_shadow(
    self, measurement_outcomes: np.ndarray, measurement_bases: np.ndarray
) -> np.ndarray:
    """
    Reconstruct classical shadow snapshots from measurement data.

    Args:
        measurement_outcomes: Binary outcomes (0/1) for each measurement
        measurement_bases: Which basis was measured for each qubit

    Returns:
        Array of shadow snapshots (density matrix representations)
    """
    pass

ComprehensiveBenchmarkAnalysis dataclass

Complete benchmark analysis with all improvements.

Attributes:

Name Type Description
run_id str

Benchmark run identifier

protocols list[str]

List of protocol IDs analyzed

n_observables int

Number of observables

n_shots_grid list[int]

Shot budgets evaluated

task_analyses dict[str, TaskAnalysis]

Per-task analysis results

crossover_analysis CrossoverAnalysis | None

Per-observable crossover analysis

locality_analysis dict[str, Any]

Performance by observable locality

statistical_comparison dict[int, StatisticalComparison]

Statistical significance tests

cost_analysis dict[str, Any]

Cost-normalized comparisons

pilot_analysis MultiPilotAnalysis | None

Multi-pilot fraction analysis

interpolated_n_star dict[str, dict[str, Any]]

N* estimates via power-law interpolation

summary dict[str, Any]

Executive summary

Source code in src/quartumse/analysis/comprehensive.py
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
@dataclass
class ComprehensiveBenchmarkAnalysis:
    """Complete benchmark analysis with all improvements.

    Attributes:
        run_id: Benchmark run identifier
        protocols: List of protocol IDs analyzed
        n_observables: Number of observables
        n_shots_grid: Shot budgets evaluated
        task_analyses: Per-task analysis results
        crossover_analysis: Per-observable crossover analysis
        locality_analysis: Performance by observable locality
        statistical_comparison: Statistical significance tests
        cost_analysis: Cost-normalized comparisons
        pilot_analysis: Multi-pilot fraction analysis
        interpolated_n_star: N* estimates via power-law interpolation
        summary: Executive summary
    """

    run_id: str
    protocols: list[str]
    n_observables: int
    n_shots_grid: list[int]
    task_analyses: dict[str, TaskAnalysis]
    crossover_analysis: CrossoverAnalysis | None
    locality_analysis: dict[str, Any]
    statistical_comparison: dict[int, StatisticalComparison]
    cost_analysis: dict[str, Any]
    pilot_analysis: MultiPilotAnalysis | None
    pilot_analysis_interpolated: MultiPilotAnalysis | None
    interpolated_n_star: dict[str, dict[str, Any]]
    summary: dict[str, Any]

    def to_dict(self) -> dict[str, Any]:
        return {
            "run_id": self.run_id,
            "protocols": self.protocols,
            "n_observables": self.n_observables,
            "n_shots_grid": self.n_shots_grid,
            "task_analyses": {
                k: {
                    "task_id": v.task_id,
                    "task_type": v.task_type,
                    "base_results": v.base_results,
                    "enhanced_results": v.enhanced_results,
                }
                for k, v in self.task_analyses.items()
            },
            "crossover_analysis": (
                self.crossover_analysis.summary if self.crossover_analysis else None
            ),
            "locality_analysis": self.locality_analysis,
            "statistical_comparison": {
                n: c.to_dict() for n, c in self.statistical_comparison.items()
            },
            "cost_analysis": self.cost_analysis,
            "pilot_analysis": self.pilot_analysis.to_dict() if self.pilot_analysis else None,
            "pilot_analysis_interpolated": (
                self.pilot_analysis_interpolated.to_dict()
                if self.pilot_analysis_interpolated
                else None
            ),
            "interpolated_n_star": self.interpolated_n_star,
            "summary": self.summary,
        }

    def save(self, path: str | Path) -> None:
        """Save analysis to JSON file."""
        path = Path(path)
        with open(path, "w") as f:
            json.dump(self.to_dict(), f, indent=2, default=str)

    def generate_report(self) -> str:
        """Generate markdown report."""
        lines = [
            "# Comprehensive Benchmark Analysis",
            "",
            f"**Run ID:** {self.run_id}",
            f"**Protocols:** {', '.join(self.protocols)}",
            f"**Observables:** {self.n_observables}",
            f"**Shot Grid:** {self.n_shots_grid}",
            "",
            "---",
            "",
        ]

        # Executive Summary
        lines.extend(
            [
                "## Executive Summary",
                "",
            ]
        )
        for key, value in self.summary.items():
            lines.append(f"- **{key}:** {value}")
        lines.append("")

        # Task Results
        lines.extend(
            [
                "---",
                "",
                "## Task Results",
                "",
            ]
        )

        for _task_id, analysis in self.task_analyses.items():
            lines.append(f"### {analysis.task_type.replace('_', ' ').title()}")
            lines.append("")
            for key, value in analysis.base_results.items():
                if not isinstance(value, (dict, list)):
                    lines.append(f"- {key}: {value}")
            if analysis.enhanced_results:
                lines.append("")
                lines.append("**Enhanced Analysis:**")
                for key, value in analysis.enhanced_results.items():
                    if not isinstance(value, (dict, list)):
                        lines.append(f"- {key}: {value}")
            lines.append("")

        # Statistical Significance
        if self.statistical_comparison:
            lines.extend(
                [
                    "---",
                    "",
                    "## Statistical Significance",
                    "",
                    "| N | Diff. P-value | K-S P-value | Reject Null | SSF (95% CI) |",
                    "|---|---------------|-------------|-------------|--------------|",
                ]
            )
            for n, comp in sorted(self.statistical_comparison.items()):
                ssf_ci = comp.ssf_ci
                ssf_str = (
                    f"{ssf_ci.estimate:.2f} [{ssf_ci.ci_low:.2f}, {ssf_ci.ci_high:.2f}]"
                    if ssf_ci
                    else "N/A"
                )
                lines.append(
                    f"| {n} | {comp.difference_test.p_value:.4f} | "
                    f"{comp.ks_test.p_value:.4f} | "
                    f"{'Yes' if comp.difference_test.reject_null else 'No'} | "
                    f"{ssf_str} |"
                )
            lines.append("")

        # Locality Analysis
        if self.locality_analysis:
            lines.extend(
                [
                    "---",
                    "",
                    "## Performance by Locality",
                    "",
                ]
            )
            for protocol_id, analysis in self.locality_analysis.items():
                if hasattr(analysis, "by_locality"):
                    lines.append(f"### {protocol_id}")
                    lines.append("")
                    lines.append(f"- Locality-SE Correlation: {analysis.locality_correlation:.3f}")
                    lines.append("")

        # Crossover Analysis
        if self.crossover_analysis:
            summary = self.crossover_analysis.summary
            lines.extend(
                [
                    "---",
                    "",
                    "## Per-Observable Crossover Analysis",
                    "",
                    f"- Protocol A ({self.crossover_analysis.protocol_a}) wins on {summary['a_win_fraction']*100:.1f}% of observables",
                    f"- Protocol B ({self.crossover_analysis.protocol_b}) wins on {summary['b_win_fraction']*100:.1f}% of observables",
                    f"- Crossover exists for {summary['crossover_fraction']*100:.1f}% of observables",
                    "",
                ]
            )

        # Pilot Analysis
        if self.pilot_analysis:
            lines.extend(
                [
                    "---",
                    "",
                    "## Multi-Pilot Fraction Analysis",
                    "",
                    "| Pilot % | Accuracy | Mean Regret |",
                    "|---------|----------|-------------|",
                ]
            )
            for frac, result in sorted(self.pilot_analysis.results.items()):
                lines.append(
                    f"| {frac*100:.0f}% | {result.selection_accuracy*100:.1f}% | {result.mean_regret:.4f} |"
                )
            lines.append("")
            if self.pilot_analysis.optimal_fraction:
                lines.append(
                    f"**Optimal pilot fraction:** {self.pilot_analysis.optimal_fraction*100:.0f}%"
                )
            lines.append("")

        # Interpolated N*
        if self.interpolated_n_star:
            lines.extend(
                [
                    "---",
                    "",
                    "## Interpolated N* (Power-Law)",
                    "",
                ]
            )
            for protocol_id, data in self.interpolated_n_star.items():
                n_star = data.get("n_star_interpolated")
                r_sq = data.get("r_squared", 0)
                lines.append(
                    f"- **{protocol_id}:** N* = {n_star:.0f} (R² = {r_sq:.3f})"
                    if n_star
                    else f"- **{protocol_id}:** N* not reached"
                )
            lines.append("")

        return "\n".join(lines)

generate_report()

Generate markdown report.

Source code in src/quartumse/analysis/comprehensive.py
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
def generate_report(self) -> str:
    """Generate markdown report."""
    lines = [
        "# Comprehensive Benchmark Analysis",
        "",
        f"**Run ID:** {self.run_id}",
        f"**Protocols:** {', '.join(self.protocols)}",
        f"**Observables:** {self.n_observables}",
        f"**Shot Grid:** {self.n_shots_grid}",
        "",
        "---",
        "",
    ]

    # Executive Summary
    lines.extend(
        [
            "## Executive Summary",
            "",
        ]
    )
    for key, value in self.summary.items():
        lines.append(f"- **{key}:** {value}")
    lines.append("")

    # Task Results
    lines.extend(
        [
            "---",
            "",
            "## Task Results",
            "",
        ]
    )

    for _task_id, analysis in self.task_analyses.items():
        lines.append(f"### {analysis.task_type.replace('_', ' ').title()}")
        lines.append("")
        for key, value in analysis.base_results.items():
            if not isinstance(value, (dict, list)):
                lines.append(f"- {key}: {value}")
        if analysis.enhanced_results:
            lines.append("")
            lines.append("**Enhanced Analysis:**")
            for key, value in analysis.enhanced_results.items():
                if not isinstance(value, (dict, list)):
                    lines.append(f"- {key}: {value}")
        lines.append("")

    # Statistical Significance
    if self.statistical_comparison:
        lines.extend(
            [
                "---",
                "",
                "## Statistical Significance",
                "",
                "| N | Diff. P-value | K-S P-value | Reject Null | SSF (95% CI) |",
                "|---|---------------|-------------|-------------|--------------|",
            ]
        )
        for n, comp in sorted(self.statistical_comparison.items()):
            ssf_ci = comp.ssf_ci
            ssf_str = (
                f"{ssf_ci.estimate:.2f} [{ssf_ci.ci_low:.2f}, {ssf_ci.ci_high:.2f}]"
                if ssf_ci
                else "N/A"
            )
            lines.append(
                f"| {n} | {comp.difference_test.p_value:.4f} | "
                f"{comp.ks_test.p_value:.4f} | "
                f"{'Yes' if comp.difference_test.reject_null else 'No'} | "
                f"{ssf_str} |"
            )
        lines.append("")

    # Locality Analysis
    if self.locality_analysis:
        lines.extend(
            [
                "---",
                "",
                "## Performance by Locality",
                "",
            ]
        )
        for protocol_id, analysis in self.locality_analysis.items():
            if hasattr(analysis, "by_locality"):
                lines.append(f"### {protocol_id}")
                lines.append("")
                lines.append(f"- Locality-SE Correlation: {analysis.locality_correlation:.3f}")
                lines.append("")

    # Crossover Analysis
    if self.crossover_analysis:
        summary = self.crossover_analysis.summary
        lines.extend(
            [
                "---",
                "",
                "## Per-Observable Crossover Analysis",
                "",
                f"- Protocol A ({self.crossover_analysis.protocol_a}) wins on {summary['a_win_fraction']*100:.1f}% of observables",
                f"- Protocol B ({self.crossover_analysis.protocol_b}) wins on {summary['b_win_fraction']*100:.1f}% of observables",
                f"- Crossover exists for {summary['crossover_fraction']*100:.1f}% of observables",
                "",
            ]
        )

    # Pilot Analysis
    if self.pilot_analysis:
        lines.extend(
            [
                "---",
                "",
                "## Multi-Pilot Fraction Analysis",
                "",
                "| Pilot % | Accuracy | Mean Regret |",
                "|---------|----------|-------------|",
            ]
        )
        for frac, result in sorted(self.pilot_analysis.results.items()):
            lines.append(
                f"| {frac*100:.0f}% | {result.selection_accuracy*100:.1f}% | {result.mean_regret:.4f} |"
            )
        lines.append("")
        if self.pilot_analysis.optimal_fraction:
            lines.append(
                f"**Optimal pilot fraction:** {self.pilot_analysis.optimal_fraction*100:.0f}%"
            )
        lines.append("")

    # Interpolated N*
    if self.interpolated_n_star:
        lines.extend(
            [
                "---",
                "",
                "## Interpolated N* (Power-Law)",
                "",
            ]
        )
        for protocol_id, data in self.interpolated_n_star.items():
            n_star = data.get("n_star_interpolated")
            r_sq = data.get("r_squared", 0)
            lines.append(
                f"- **{protocol_id}:** N* = {n_star:.0f} (R² = {r_sq:.3f})"
                if n_star
                else f"- **{protocol_id}:** N* not reached"
            )
        lines.append("")

    return "\n".join(lines)

save(path)

Save analysis to JSON file.

Source code in src/quartumse/analysis/comprehensive.py
122
123
124
125
126
def save(self, path: str | Path) -> None:
    """Save analysis to JSON file."""
    path = Path(path)
    with open(path, "w") as f:
        json.dump(self.to_dict(), f, indent=2, default=str)

CostModel dataclass

Model for computing effective cost of measurements.

Cost factors: - shots: Base cost (number of circuit executions) - depth_penalty: Multiplier per unit circuit depth - gate_penalty: Multiplier per 2-qubit gate - noise_factor: Additional noise-based multiplier

Effective cost = shots * (1 + depth_penalty * depth) * (1 + gate_penalty * gates) * noise_factor

Source code in src/quartumse/analysis/cost_normalized.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
@dataclass
class CostModel:
    """Model for computing effective cost of measurements.

    Cost factors:
    - shots: Base cost (number of circuit executions)
    - depth_penalty: Multiplier per unit circuit depth
    - gate_penalty: Multiplier per 2-qubit gate
    - noise_factor: Additional noise-based multiplier

    Effective cost = shots * (1 + depth_penalty * depth) * (1 + gate_penalty * gates) * noise_factor
    """

    depth_penalty: float = 0.01  # 1% per depth unit
    gate_penalty: float = 0.005  # 0.5% per 2-qubit gate
    noise_factor: float = 1.0
    include_classical_time: bool = False
    classical_time_weight: float = 0.001  # Weight for classical compute time

    def compute_cost(
        self,
        n_shots: int,
        circuit_depth: int = 0,
        twoq_gates: int = 0,
        classical_time_s: float = 0.0,
    ) -> float:
        """Compute effective cost for a measurement configuration.

        Args:
            n_shots: Number of shots
            circuit_depth: Circuit depth
            twoq_gates: Number of 2-qubit gates
            classical_time_s: Classical processing time in seconds

        Returns:
            Effective cost value
        """
        base_cost = n_shots
        depth_mult = 1 + self.depth_penalty * circuit_depth
        gate_mult = 1 + self.gate_penalty * twoq_gates

        cost = base_cost * depth_mult * gate_mult * self.noise_factor

        if self.include_classical_time:
            cost += self.classical_time_weight * classical_time_s

        return cost

    def to_dict(self) -> dict[str, float]:
        return {
            "depth_penalty": self.depth_penalty,
            "gate_penalty": self.gate_penalty,
            "noise_factor": self.noise_factor,
            "include_classical_time": self.include_classical_time,
            "classical_time_weight": self.classical_time_weight,
        }

compute_cost(n_shots, circuit_depth=0, twoq_gates=0, classical_time_s=0.0)

Compute effective cost for a measurement configuration.

Parameters:

Name Type Description Default
n_shots int

Number of shots

required
circuit_depth int

Circuit depth

0
twoq_gates int

Number of 2-qubit gates

0
classical_time_s float

Classical processing time in seconds

0.0

Returns:

Type Description
float

Effective cost value

Source code in src/quartumse/analysis/cost_normalized.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def compute_cost(
    self,
    n_shots: int,
    circuit_depth: int = 0,
    twoq_gates: int = 0,
    classical_time_s: float = 0.0,
) -> float:
    """Compute effective cost for a measurement configuration.

    Args:
        n_shots: Number of shots
        circuit_depth: Circuit depth
        twoq_gates: Number of 2-qubit gates
        classical_time_s: Classical processing time in seconds

    Returns:
        Effective cost value
    """
    base_cost = n_shots
    depth_mult = 1 + self.depth_penalty * circuit_depth
    gate_mult = 1 + self.gate_penalty * twoq_gates

    cost = base_cost * depth_mult * gate_mult * self.noise_factor

    if self.include_classical_time:
        cost += self.classical_time_weight * classical_time_s

    return cost

DirectGroupedProtocol

Bases: StaticProtocol

Direct measurement with commuting grouping (§4.1B).

Observables are partitioned into qubit-wise commuting groups. Each group is measured in a shared basis.

This is the REQUIRED baseline for defensible benchmarks.

Attributes:

Name Type Description
protocol_id str

"direct_grouped"

protocol_version str

"1.0.0"

grouping_method str

Method for partitioning ("greedy" or "sorted_insertion")

Source code in src/quartumse/protocols/baselines/direct_grouped.py
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
@register_protocol
class DirectGroupedProtocol(StaticProtocol):
    """Direct measurement with commuting grouping (§4.1B).

    Observables are partitioned into qubit-wise commuting groups.
    Each group is measured in a shared basis.

    This is the REQUIRED baseline for defensible benchmarks.

    Attributes:
        protocol_id: "direct_grouped"
        protocol_version: "1.0.0"
        grouping_method: Method for partitioning ("greedy" or "sorted_insertion")
    """

    protocol_id: str = "direct_grouped"
    protocol_version: str = "1.0.0"
    grouping_method: str = "greedy"

    def __init__(self, grouping_method: str = "greedy") -> None:
        """Initialize protocol.

        Args:
            grouping_method: "greedy" or "sorted_insertion"
        """
        super().__init__()
        self.grouping_method = grouping_method

    def initialize(
        self,
        observable_set: ObservableSet,
        total_budget: int,
        seed: int,
    ) -> DirectGroupedState:
        """Initialize protocol state with commuting groups.

        Args:
            observable_set: Set of observables to estimate.
            total_budget: Total number of shots available.
            seed: Random seed for reproducibility.

        Returns:
            Initialized DirectGroupedState.
        """
        # Partition into commuting groups
        groups, stats = partition_observable_set(observable_set, method=self.grouping_method)

        G = len(groups)
        # Ensure at least 1 shot per group (if budget allows)
        shots_per_group = max(1, total_budget // G) if G > 0 else 0

        # Initialize storage for each group
        group_bitstrings = {g.group_id: [] for g in groups}

        return DirectGroupedState(
            observable_set=observable_set,
            total_budget=total_budget,
            remaining_budget=total_budget,
            seed=seed,
            n_rounds=0,
            groups=groups,
            shots_per_group=shots_per_group,
            group_bitstrings=group_bitstrings,
            grouping_method=self.grouping_method,
            metadata={
                "protocol_id": self.protocol_id,
                "n_groups": G,
                "grouping_stats": stats,
            },
        )

    def plan(
        self,
        state: ProtocolState,
    ) -> MeasurementPlan:
        """Generate measurement plan with one setting per group.

        Args:
            state: Current protocol state.

        Returns:
            MeasurementPlan with G settings, one per commuting group.
        """
        grouped_state = state
        if not isinstance(grouped_state, DirectGroupedState):
            raise TypeError("Expected DirectGroupedState")

        settings = []
        shots_per_setting = []
        observable_setting_map: dict[str, list[int]] = {}

        for i, group in enumerate(grouped_state.groups):
            setting = MeasurementSetting(
                setting_id=group.group_id,
                measurement_basis=group.measurement_basis,
                target_qubits=list(range(grouped_state.observable_set.n_qubits)),
                metadata={
                    "group_size": group.size,
                    "observable_ids": [obs.observable_id for obs in group.observables],
                },
            )
            settings.append(setting)
            shots_per_setting.append(grouped_state.shots_per_group)

            # Map each observable in this group to this setting
            for obs in group.observables:
                observable_setting_map[obs.observable_id] = [i]

        return MeasurementPlan(
            settings=settings,
            shots_per_setting=shots_per_setting,
            observable_setting_map=observable_setting_map,
            metadata={
                "n_groups": len(grouped_state.groups),
                "grouping_method": grouped_state.grouping_method,
            },
        )

    def update(
        self,
        state: ProtocolState,
        data_chunk: RawDatasetChunk,
    ) -> ProtocolState:
        """Update state with new measurement data.

        Args:
            state: Current protocol state.
            data_chunk: New measurement data.

        Returns:
            Updated protocol state.
        """
        grouped_state = state
        if not isinstance(grouped_state, DirectGroupedState):
            raise TypeError("Expected DirectGroupedState")

        # Store bitstrings for each group
        for setting_id, bitstrings in data_chunk.bitstrings.items():
            grouped_state.group_bitstrings[setting_id].extend(bitstrings)

        # Update budget tracking
        total_new_shots = sum(len(bs) for bs in data_chunk.bitstrings.values())
        grouped_state.remaining_budget -= total_new_shots
        grouped_state.round_number += 1

        return grouped_state

    def finalize(
        self,
        state: ProtocolState,
        observable_set: ObservableSet,
    ) -> Estimates:
        """Compute final estimates from collected data.

        For grouped measurements, each observable's estimate is computed
        from the same bitstrings as other observables in its group.

        Args:
            state: Final protocol state.
            observable_set: Set of observables (for reference).

        Returns:
            Estimates for all observables.
        """
        grouped_state = state
        if not isinstance(grouped_state, DirectGroupedState):
            raise TypeError("Expected DirectGroupedState")

        estimates = []

        # Build mapping from observable_id to group
        obs_to_group = {}
        for group in grouped_state.groups:
            for obs in group.observables:
                obs_to_group[obs.observable_id] = group

        for obs in observable_set.observables:
            group = obs_to_group.get(obs.observable_id)

            if group is None:
                estimate = ObservableEstimate(
                    observable_id=obs.observable_id,
                    estimate=0.0,
                    se=float("inf"),
                    n_shots=0,
                    n_settings=0,
                )
            else:
                bitstrings = grouped_state.group_bitstrings.get(group.group_id, [])

                if not bitstrings:
                    estimate = ObservableEstimate(
                        observable_id=obs.observable_id,
                        estimate=0.0,
                        se=float("inf"),
                        n_shots=0,
                        n_settings=1,
                    )
                else:
                    # Compute expectation value from shared bitstrings
                    expectation, se = self._estimate_from_bitstrings(
                        bitstrings,
                        obs.pauli_string,
                        group.measurement_basis,
                        obs.coefficient,
                    )

                    estimate = ObservableEstimate(
                        observable_id=obs.observable_id,
                        estimate=expectation,
                        se=se,
                        n_shots=len(bitstrings),
                        n_settings=1,
                        metadata={"group_id": group.group_id},
                    )

            estimates.append(estimate)

        return Estimates(
            estimates=estimates,
            protocol_id=self.protocol_id,
            protocol_version=self.protocol_version,
            total_shots=state.total_budget - state.remaining_budget,
            metadata={
                "n_groups": len(grouped_state.groups),
                "grouping_method": grouped_state.grouping_method,
            },
        )

    def _estimate_from_bitstrings(
        self,
        bitstrings: list[str],
        pauli_string: str,
        measurement_basis: str,
        coefficient: float,
    ) -> tuple[float, float]:
        """Estimate expectation value from grouped measurement bitstrings.

        When measuring in a shared basis, we need to consider which qubits
        are relevant for each observable. The eigenvalue for observable P
        is determined by the parity of outcomes on P's support.

        Args:
            bitstrings: List of measurement outcome bitstrings.
            pauli_string: The Pauli observable being estimated.
            measurement_basis: The shared measurement basis.
            coefficient: Observable coefficient.

        Returns:
            Tuple of (expectation value, standard error).
        """
        if not bitstrings:
            return 0.0, float("inf")

        # Get positions where the observable has non-identity operators
        support = [i for i, p in enumerate(pauli_string) if p != "I"]

        # Vectorized eigenvalue computation (numpy, no Python loop)
        bs_array = np.frombuffer(
            "".join(bitstrings).encode(), dtype=np.uint8
        ).reshape(len(bitstrings), -1)[:, support]
        parities = (bs_array - 48).sum(axis=1) % 2
        eigenvalues_array = np.where(parities == 0, 1.0, -1.0)
        mean = float(np.mean(eigenvalues_array)) * coefficient
        std = float(np.std(eigenvalues_array, ddof=1))
        se = std / np.sqrt(len(eigenvalues_array)) * abs(coefficient)

        return mean, se

__init__(grouping_method='greedy')

Initialize protocol.

Parameters:

Name Type Description Default
grouping_method str

"greedy" or "sorted_insertion"

'greedy'
Source code in src/quartumse/protocols/baselines/direct_grouped.py
73
74
75
76
77
78
79
80
def __init__(self, grouping_method: str = "greedy") -> None:
    """Initialize protocol.

    Args:
        grouping_method: "greedy" or "sorted_insertion"
    """
    super().__init__()
    self.grouping_method = grouping_method

finalize(state, observable_set)

Compute final estimates from collected data.

For grouped measurements, each observable's estimate is computed from the same bitstrings as other observables in its group.

Parameters:

Name Type Description Default
state ProtocolState

Final protocol state.

required
observable_set ObservableSet

Set of observables (for reference).

required

Returns:

Type Description
Estimates

Estimates for all observables.

Source code in src/quartumse/protocols/baselines/direct_grouped.py
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
def finalize(
    self,
    state: ProtocolState,
    observable_set: ObservableSet,
) -> Estimates:
    """Compute final estimates from collected data.

    For grouped measurements, each observable's estimate is computed
    from the same bitstrings as other observables in its group.

    Args:
        state: Final protocol state.
        observable_set: Set of observables (for reference).

    Returns:
        Estimates for all observables.
    """
    grouped_state = state
    if not isinstance(grouped_state, DirectGroupedState):
        raise TypeError("Expected DirectGroupedState")

    estimates = []

    # Build mapping from observable_id to group
    obs_to_group = {}
    for group in grouped_state.groups:
        for obs in group.observables:
            obs_to_group[obs.observable_id] = group

    for obs in observable_set.observables:
        group = obs_to_group.get(obs.observable_id)

        if group is None:
            estimate = ObservableEstimate(
                observable_id=obs.observable_id,
                estimate=0.0,
                se=float("inf"),
                n_shots=0,
                n_settings=0,
            )
        else:
            bitstrings = grouped_state.group_bitstrings.get(group.group_id, [])

            if not bitstrings:
                estimate = ObservableEstimate(
                    observable_id=obs.observable_id,
                    estimate=0.0,
                    se=float("inf"),
                    n_shots=0,
                    n_settings=1,
                )
            else:
                # Compute expectation value from shared bitstrings
                expectation, se = self._estimate_from_bitstrings(
                    bitstrings,
                    obs.pauli_string,
                    group.measurement_basis,
                    obs.coefficient,
                )

                estimate = ObservableEstimate(
                    observable_id=obs.observable_id,
                    estimate=expectation,
                    se=se,
                    n_shots=len(bitstrings),
                    n_settings=1,
                    metadata={"group_id": group.group_id},
                )

        estimates.append(estimate)

    return Estimates(
        estimates=estimates,
        protocol_id=self.protocol_id,
        protocol_version=self.protocol_version,
        total_shots=state.total_budget - state.remaining_budget,
        metadata={
            "n_groups": len(grouped_state.groups),
            "grouping_method": grouped_state.grouping_method,
        },
    )

initialize(observable_set, total_budget, seed)

Initialize protocol state with commuting groups.

Parameters:

Name Type Description Default
observable_set ObservableSet

Set of observables to estimate.

required
total_budget int

Total number of shots available.

required
seed int

Random seed for reproducibility.

required

Returns:

Type Description
DirectGroupedState

Initialized DirectGroupedState.

Source code in src/quartumse/protocols/baselines/direct_grouped.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
def initialize(
    self,
    observable_set: ObservableSet,
    total_budget: int,
    seed: int,
) -> DirectGroupedState:
    """Initialize protocol state with commuting groups.

    Args:
        observable_set: Set of observables to estimate.
        total_budget: Total number of shots available.
        seed: Random seed for reproducibility.

    Returns:
        Initialized DirectGroupedState.
    """
    # Partition into commuting groups
    groups, stats = partition_observable_set(observable_set, method=self.grouping_method)

    G = len(groups)
    # Ensure at least 1 shot per group (if budget allows)
    shots_per_group = max(1, total_budget // G) if G > 0 else 0

    # Initialize storage for each group
    group_bitstrings = {g.group_id: [] for g in groups}

    return DirectGroupedState(
        observable_set=observable_set,
        total_budget=total_budget,
        remaining_budget=total_budget,
        seed=seed,
        n_rounds=0,
        groups=groups,
        shots_per_group=shots_per_group,
        group_bitstrings=group_bitstrings,
        grouping_method=self.grouping_method,
        metadata={
            "protocol_id": self.protocol_id,
            "n_groups": G,
            "grouping_stats": stats,
        },
    )

plan(state)

Generate measurement plan with one setting per group.

Parameters:

Name Type Description Default
state ProtocolState

Current protocol state.

required

Returns:

Type Description
MeasurementPlan

MeasurementPlan with G settings, one per commuting group.

Source code in src/quartumse/protocols/baselines/direct_grouped.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
def plan(
    self,
    state: ProtocolState,
) -> MeasurementPlan:
    """Generate measurement plan with one setting per group.

    Args:
        state: Current protocol state.

    Returns:
        MeasurementPlan with G settings, one per commuting group.
    """
    grouped_state = state
    if not isinstance(grouped_state, DirectGroupedState):
        raise TypeError("Expected DirectGroupedState")

    settings = []
    shots_per_setting = []
    observable_setting_map: dict[str, list[int]] = {}

    for i, group in enumerate(grouped_state.groups):
        setting = MeasurementSetting(
            setting_id=group.group_id,
            measurement_basis=group.measurement_basis,
            target_qubits=list(range(grouped_state.observable_set.n_qubits)),
            metadata={
                "group_size": group.size,
                "observable_ids": [obs.observable_id for obs in group.observables],
            },
        )
        settings.append(setting)
        shots_per_setting.append(grouped_state.shots_per_group)

        # Map each observable in this group to this setting
        for obs in group.observables:
            observable_setting_map[obs.observable_id] = [i]

    return MeasurementPlan(
        settings=settings,
        shots_per_setting=shots_per_setting,
        observable_setting_map=observable_setting_map,
        metadata={
            "n_groups": len(grouped_state.groups),
            "grouping_method": grouped_state.grouping_method,
        },
    )

update(state, data_chunk)

Update state with new measurement data.

Parameters:

Name Type Description Default
state ProtocolState

Current protocol state.

required
data_chunk RawDatasetChunk

New measurement data.

required

Returns:

Type Description
ProtocolState

Updated protocol state.

Source code in src/quartumse/protocols/baselines/direct_grouped.py
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
def update(
    self,
    state: ProtocolState,
    data_chunk: RawDatasetChunk,
) -> ProtocolState:
    """Update state with new measurement data.

    Args:
        state: Current protocol state.
        data_chunk: New measurement data.

    Returns:
        Updated protocol state.
    """
    grouped_state = state
    if not isinstance(grouped_state, DirectGroupedState):
        raise TypeError("Expected DirectGroupedState")

    # Store bitstrings for each group
    for setting_id, bitstrings in data_chunk.bitstrings.items():
        grouped_state.group_bitstrings[setting_id].extend(bitstrings)

    # Update budget tracking
    total_new_shots = sum(len(bs) for bs in data_chunk.bitstrings.values())
    grouped_state.remaining_budget -= total_new_shots
    grouped_state.round_number += 1

    return grouped_state

DirectNaiveProtocol

Bases: StaticProtocol

Direct measurement without grouping (§4.1A).

Each observable is measured independently in its native basis. This is the simplest possible approach and serves as a baseline.

Attributes:

Name Type Description
protocol_id str

"direct_naive"

protocol_version str

"1.0.0"

Source code in src/quartumse/protocols/baselines/direct_naive.py
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
@register_protocol
class DirectNaiveProtocol(StaticProtocol):
    """Direct measurement without grouping (§4.1A).

    Each observable is measured independently in its native basis.
    This is the simplest possible approach and serves as a baseline.

    Attributes:
        protocol_id: "direct_naive"
        protocol_version: "1.0.0"
    """

    protocol_id: str = "direct_naive"
    protocol_version: str = "1.0.0"

    def initialize(
        self,
        observable_set: ObservableSet,
        total_budget: int,
        seed: int,
    ) -> DirectNaiveState:
        """Initialize protocol state.

        Args:
            observable_set: Set of observables to estimate.
            total_budget: Total number of shots available.
            seed: Random seed for reproducibility.

        Returns:
            Initialized DirectNaiveState.
        """
        M = len(observable_set)
        # Ensure at least 1 shot per observable (if budget allows)
        shots_per_observable = max(1, total_budget // M) if M > 0 else 0

        # Initialize storage for each observable
        observable_bitstrings = {obs.observable_id: [] for obs in observable_set.observables}

        return DirectNaiveState(
            observable_set=observable_set,
            total_budget=total_budget,
            remaining_budget=total_budget,
            seed=seed,
            n_rounds=0,
            shots_per_observable=shots_per_observable,
            observable_bitstrings=observable_bitstrings,
            metadata={"protocol_id": self.protocol_id},
        )

    def plan(
        self,
        state: ProtocolState,
    ) -> MeasurementPlan:
        """Generate measurement plan for all observables.

        Each observable gets its own measurement setting in its native basis.

        Args:
            state: Current protocol state.

        Returns:
            MeasurementPlan with M settings, one per observable.
        """
        direct_state = state
        if not isinstance(direct_state, DirectNaiveState):
            raise TypeError("Expected DirectNaiveState")

        settings = []
        shots_per_setting = []
        observable_setting_map: dict[str, list[int]] = {}

        for i, obs in enumerate(state.observable_set.observables):
            # Native measurement basis is the Pauli string with I -> Z
            basis = obs.pauli_string.replace("I", "Z")

            setting = MeasurementSetting(
                setting_id=f"setting_{i}",
                measurement_basis=basis,
                target_qubits=list(range(obs.n_qubits)),
                metadata={"observable_id": obs.observable_id},
            )
            settings.append(setting)
            shots_per_setting.append(direct_state.shots_per_observable)
            observable_setting_map[obs.observable_id] = [i]

        return MeasurementPlan(
            settings=settings,
            shots_per_setting=shots_per_setting,
            observable_setting_map=observable_setting_map,
            metadata={"n_observables": len(state.observable_set)},
        )

    def update(
        self,
        state: ProtocolState,
        data_chunk: RawDatasetChunk,
    ) -> ProtocolState:
        """Update state with new measurement data.

        Args:
            state: Current protocol state.
            data_chunk: New measurement data.

        Returns:
            Updated protocol state.
        """
        direct_state = state
        if not isinstance(direct_state, DirectNaiveState):
            raise TypeError("Expected DirectNaiveState")

        # Store bitstrings for each setting
        for setting_id, bitstrings in data_chunk.bitstrings.items():
            # Find which observable this setting corresponds to
            setting_idx = int(setting_id.split("_")[1])
            obs = direct_state.observable_set.observables[setting_idx]
            direct_state.observable_bitstrings[obs.observable_id].extend(bitstrings)

        # Update budget tracking
        total_new_shots = sum(len(bs) for bs in data_chunk.bitstrings.values())
        direct_state.remaining_budget -= total_new_shots
        direct_state.round_number += 1

        return direct_state

    def finalize(
        self,
        state: ProtocolState,
        observable_set: ObservableSet,
    ) -> Estimates:
        """Compute final estimates from collected data.

        Args:
            state: Final protocol state.
            observable_set: Set of observables (for reference).

        Returns:
            Estimates for all observables.
        """
        direct_state = state
        if not isinstance(direct_state, DirectNaiveState):
            raise TypeError("Expected DirectNaiveState")

        estimates = []

        for obs in observable_set.observables:
            bitstrings = direct_state.observable_bitstrings.get(obs.observable_id, [])

            if not bitstrings:
                # No data collected
                estimate = ObservableEstimate(
                    observable_id=obs.observable_id,
                    estimate=0.0,
                    se=float("inf"),
                    n_shots=0,
                    n_settings=0,
                )
            else:
                # Compute expectation value from bitstrings
                expectation, se = self._estimate_from_bitstrings(
                    bitstrings, obs.pauli_string, obs.coefficient
                )

                estimate = ObservableEstimate(
                    observable_id=obs.observable_id,
                    estimate=expectation,
                    se=se,
                    n_shots=len(bitstrings),
                    n_settings=1,
                )

            estimates.append(estimate)

        return Estimates(
            estimates=estimates,
            protocol_id=self.protocol_id,
            protocol_version=self.protocol_version,
            total_shots=state.total_budget - state.remaining_budget,
            metadata={"n_observables": len(observable_set)},
        )

    def _estimate_from_bitstrings(
        self,
        bitstrings: list[str],
        pauli_string: str,
        coefficient: float,
    ) -> tuple[float, float]:
        """Estimate expectation value from measurement bitstrings.

        For a Pauli string P = P_1 ⊗ P_2 ⊗ ... ⊗ P_n, the expectation
        value is estimated as the mean of (-1)^(parity) where parity
        counts the number of 1s on qubits where P_i ≠ I.

        Args:
            bitstrings: List of measurement outcome bitstrings.
            pauli_string: The Pauli operator being measured.
            coefficient: Observable coefficient.

        Returns:
            Tuple of (expectation value, standard error).
        """
        if not bitstrings:
            return 0.0, float("inf")

        # Get positions where we have non-identity Paulis
        support = [i for i, p in enumerate(pauli_string) if p != "I"]

        # Vectorised eigenvalue computation:
        # Convert bitstrings to a 2-D uint8 array, slice support columns,
        # then compute parity and eigenvalues without a Python loop.
        bs_array = np.frombuffer(
            "".join(bitstrings).encode(), dtype=np.uint8
        ).reshape(len(bitstrings), -1)[:, support]
        # ASCII '1' = 49; subtract ord('0')=48 so '0'->0, '1'->1
        parities = (bs_array - 48).sum(axis=1) % 2
        eigenvalues_array = np.where(parities == 0, 1.0, -1.0)

        # Compute mean and standard error
        mean = float(np.mean(eigenvalues_array)) * coefficient
        std = float(np.std(eigenvalues_array, ddof=1))
        se = std / np.sqrt(len(eigenvalues_array)) * abs(coefficient)

        return mean, se

finalize(state, observable_set)

Compute final estimates from collected data.

Parameters:

Name Type Description Default
state ProtocolState

Final protocol state.

required
observable_set ObservableSet

Set of observables (for reference).

required

Returns:

Type Description
Estimates

Estimates for all observables.

Source code in src/quartumse/protocols/baselines/direct_naive.py
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
def finalize(
    self,
    state: ProtocolState,
    observable_set: ObservableSet,
) -> Estimates:
    """Compute final estimates from collected data.

    Args:
        state: Final protocol state.
        observable_set: Set of observables (for reference).

    Returns:
        Estimates for all observables.
    """
    direct_state = state
    if not isinstance(direct_state, DirectNaiveState):
        raise TypeError("Expected DirectNaiveState")

    estimates = []

    for obs in observable_set.observables:
        bitstrings = direct_state.observable_bitstrings.get(obs.observable_id, [])

        if not bitstrings:
            # No data collected
            estimate = ObservableEstimate(
                observable_id=obs.observable_id,
                estimate=0.0,
                se=float("inf"),
                n_shots=0,
                n_settings=0,
            )
        else:
            # Compute expectation value from bitstrings
            expectation, se = self._estimate_from_bitstrings(
                bitstrings, obs.pauli_string, obs.coefficient
            )

            estimate = ObservableEstimate(
                observable_id=obs.observable_id,
                estimate=expectation,
                se=se,
                n_shots=len(bitstrings),
                n_settings=1,
            )

        estimates.append(estimate)

    return Estimates(
        estimates=estimates,
        protocol_id=self.protocol_id,
        protocol_version=self.protocol_version,
        total_shots=state.total_budget - state.remaining_budget,
        metadata={"n_observables": len(observable_set)},
    )

initialize(observable_set, total_budget, seed)

Initialize protocol state.

Parameters:

Name Type Description Default
observable_set ObservableSet

Set of observables to estimate.

required
total_budget int

Total number of shots available.

required
seed int

Random seed for reproducibility.

required

Returns:

Type Description
DirectNaiveState

Initialized DirectNaiveState.

Source code in src/quartumse/protocols/baselines/direct_naive.py
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
def initialize(
    self,
    observable_set: ObservableSet,
    total_budget: int,
    seed: int,
) -> DirectNaiveState:
    """Initialize protocol state.

    Args:
        observable_set: Set of observables to estimate.
        total_budget: Total number of shots available.
        seed: Random seed for reproducibility.

    Returns:
        Initialized DirectNaiveState.
    """
    M = len(observable_set)
    # Ensure at least 1 shot per observable (if budget allows)
    shots_per_observable = max(1, total_budget // M) if M > 0 else 0

    # Initialize storage for each observable
    observable_bitstrings = {obs.observable_id: [] for obs in observable_set.observables}

    return DirectNaiveState(
        observable_set=observable_set,
        total_budget=total_budget,
        remaining_budget=total_budget,
        seed=seed,
        n_rounds=0,
        shots_per_observable=shots_per_observable,
        observable_bitstrings=observable_bitstrings,
        metadata={"protocol_id": self.protocol_id},
    )

plan(state)

Generate measurement plan for all observables.

Each observable gets its own measurement setting in its native basis.

Parameters:

Name Type Description Default
state ProtocolState

Current protocol state.

required

Returns:

Type Description
MeasurementPlan

MeasurementPlan with M settings, one per observable.

Source code in src/quartumse/protocols/baselines/direct_naive.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
def plan(
    self,
    state: ProtocolState,
) -> MeasurementPlan:
    """Generate measurement plan for all observables.

    Each observable gets its own measurement setting in its native basis.

    Args:
        state: Current protocol state.

    Returns:
        MeasurementPlan with M settings, one per observable.
    """
    direct_state = state
    if not isinstance(direct_state, DirectNaiveState):
        raise TypeError("Expected DirectNaiveState")

    settings = []
    shots_per_setting = []
    observable_setting_map: dict[str, list[int]] = {}

    for i, obs in enumerate(state.observable_set.observables):
        # Native measurement basis is the Pauli string with I -> Z
        basis = obs.pauli_string.replace("I", "Z")

        setting = MeasurementSetting(
            setting_id=f"setting_{i}",
            measurement_basis=basis,
            target_qubits=list(range(obs.n_qubits)),
            metadata={"observable_id": obs.observable_id},
        )
        settings.append(setting)
        shots_per_setting.append(direct_state.shots_per_observable)
        observable_setting_map[obs.observable_id] = [i]

    return MeasurementPlan(
        settings=settings,
        shots_per_setting=shots_per_setting,
        observable_setting_map=observable_setting_map,
        metadata={"n_observables": len(state.observable_set)},
    )

update(state, data_chunk)

Update state with new measurement data.

Parameters:

Name Type Description Default
state ProtocolState

Current protocol state.

required
data_chunk RawDatasetChunk

New measurement data.

required

Returns:

Type Description
ProtocolState

Updated protocol state.

Source code in src/quartumse/protocols/baselines/direct_naive.py
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
def update(
    self,
    state: ProtocolState,
    data_chunk: RawDatasetChunk,
) -> ProtocolState:
    """Update state with new measurement data.

    Args:
        state: Current protocol state.
        data_chunk: New measurement data.

    Returns:
        Updated protocol state.
    """
    direct_state = state
    if not isinstance(direct_state, DirectNaiveState):
        raise TypeError("Expected DirectNaiveState")

    # Store bitstrings for each setting
    for setting_id, bitstrings in data_chunk.bitstrings.items():
        # Find which observable this setting corresponds to
        setting_idx = int(setting_id.split("_")[1])
        obs = direct_state.observable_set.observables[setting_idx]
        direct_state.observable_bitstrings[obs.observable_id].extend(bitstrings)

    # Update budget tracking
    total_new_shots = sum(len(bs) for bs in data_chunk.bitstrings.values())
    direct_state.remaining_budget -= total_new_shots
    direct_state.round_number += 1

    return direct_state

DirectOptimizedProtocol

Bases: StaticProtocol

Direct measurement with optimal shot allocation (§4.1C).

This protocol: 1. Groups observables into qubit-wise commuting families 2. Allocates shots optimally based on group importance/size 3. Uses a simple heuristic: more shots to larger groups

The optimal allocation aims to minimize the worst-case SE across all observables, subject to the total shot budget constraint.

Attributes:

Name Type Description
protocol_id str

"direct_optimized"

protocol_version str

"1.0.0"

allocation_strategy str

Strategy for shot allocation

Source code in src/quartumse/protocols/baselines/direct_optimized.py
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
@register_protocol
class DirectOptimizedProtocol(StaticProtocol):
    """Direct measurement with optimal shot allocation (§4.1C).

    This protocol:
    1. Groups observables into qubit-wise commuting families
    2. Allocates shots optimally based on group importance/size
    3. Uses a simple heuristic: more shots to larger groups

    The optimal allocation aims to minimize the worst-case SE across
    all observables, subject to the total shot budget constraint.

    Attributes:
        protocol_id: "direct_optimized"
        protocol_version: "1.0.0"
        allocation_strategy: Strategy for shot allocation
    """

    protocol_id: str = "direct_optimized"
    protocol_version: str = "1.0.0"
    allocation_strategy: str = "proportional"  # or "equal_se", "max_min"

    def __init__(self, allocation_strategy: str = "proportional") -> None:
        """Initialize protocol.

        Args:
            allocation_strategy: Strategy for allocating shots to groups.
                - "proportional": Proportional to sqrt(group_size)
                - "equal_se": Aim for equal SE across observables
                - "max_min": Maximize minimum shots per observable
        """
        super().__init__()
        self.allocation_strategy = allocation_strategy

    def initialize(
        self,
        observable_set: ObservableSet,
        total_budget: int,
        seed: int,
    ) -> DirectOptimizedState:
        """Initialize protocol state with optimal allocation.

        Args:
            observable_set: Set of observables to estimate.
            total_budget: Total number of shots available.
            seed: Random seed for reproducibility.

        Returns:
            Initialized DirectOptimizedState.
        """
        # Partition into commuting groups
        groups, stats = partition_observable_set(observable_set, method="greedy")

        # Compute optimal allocation
        shots_per_group, allocation_weights = self._compute_allocation(groups, total_budget)

        # Initialize storage
        group_bitstrings = {g.group_id: [] for g in groups}

        return DirectOptimizedState(
            observable_set=observable_set,
            total_budget=total_budget,
            remaining_budget=total_budget,
            seed=seed,
            n_rounds=0,
            groups=groups,
            shots_per_group=shots_per_group,
            group_bitstrings=group_bitstrings,
            allocation_weights=allocation_weights,
            metadata={
                "protocol_id": self.protocol_id,
                "n_groups": len(groups),
                "allocation_strategy": self.allocation_strategy,
                "grouping_stats": stats,
            },
        )

    def _compute_allocation(
        self,
        groups: list[CommutingGroup],
        total_budget: int,
    ) -> tuple[dict[str, int], dict[str, float]]:
        """Compute optimal shot allocation across groups.

        Args:
            groups: List of commuting groups.
            total_budget: Total shots available.

        Returns:
            Tuple of (shots_per_group dict, allocation_weights dict).
        """
        if not groups:
            return {}, {}

        weights = {}

        if self.allocation_strategy == "proportional":
            # Allocate proportionally to sqrt(group_size)
            # Rationale: larger groups benefit more from shared measurements
            for group in groups:
                weights[group.group_id] = np.sqrt(group.size)

        elif self.allocation_strategy == "equal_se":
            # Allocate to achieve approximately equal SE
            # SE ~ 1/sqrt(N), so N ~ 1/SE^2
            # For equal SE across groups with different sizes:
            # weight ~ sqrt(group_size) (same as proportional)
            for group in groups:
                weights[group.group_id] = np.sqrt(group.size)

        elif self.allocation_strategy == "max_min":
            # Maximize the minimum shots per observable
            # Each observable gets N_g / |G_g| effective shots
            # To equalize: N_g / |G_g| = constant
            # So N_g ~ |G_g| (group size)
            for group in groups:
                weights[group.group_id] = float(group.size)

        else:
            # Default to uniform
            for group in groups:
                weights[group.group_id] = 1.0

        # Normalize weights
        total_weight = sum(weights.values())
        for gid in weights:
            weights[gid] /= total_weight

        # Allocate shots (ensuring at least 1 shot per group)
        shots_per_group = {}
        allocated = 0
        n_groups = len(groups)

        for group in groups[:-1]:  # All but last
            shots = int(weights[group.group_id] * total_budget)
            # Ensure at least 1 shot per group
            shots = max(1, shots) if total_budget >= n_groups else max(1, shots)
            shots_per_group[group.group_id] = shots
            allocated += shots

        # Last group gets remainder to avoid rounding issues
        if groups:
            remaining = total_budget - allocated
            # Ensure at least 1 shot for last group
            shots_per_group[groups[-1].group_id] = max(1, remaining)

        return shots_per_group, weights

    def plan(
        self,
        state: ProtocolState,
    ) -> MeasurementPlan:
        """Generate measurement plan with optimal allocation.

        Args:
            state: Current protocol state.

        Returns:
            MeasurementPlan with G settings and optimal shot distribution.
        """
        opt_state = state
        if not isinstance(opt_state, DirectOptimizedState):
            raise TypeError("Expected DirectOptimizedState")

        settings = []
        shots_per_setting = []
        observable_setting_map: dict[str, list[int]] = {}

        for i, group in enumerate(opt_state.groups):
            setting = MeasurementSetting(
                setting_id=group.group_id,
                measurement_basis=group.measurement_basis,
                target_qubits=list(range(opt_state.observable_set.n_qubits)),
                metadata={
                    "group_size": group.size,
                    "allocation_weight": opt_state.allocation_weights[group.group_id],
                    "observable_ids": [obs.observable_id for obs in group.observables],
                },
            )
            settings.append(setting)
            shots_per_setting.append(opt_state.shots_per_group[group.group_id])

            # Map each observable in this group to this setting
            for obs in group.observables:
                observable_setting_map[obs.observable_id] = [i]

        return MeasurementPlan(
            settings=settings,
            shots_per_setting=shots_per_setting,
            observable_setting_map=observable_setting_map,
            metadata={
                "n_groups": len(opt_state.groups),
                "allocation_strategy": self.allocation_strategy,
            },
        )

    def update(
        self,
        state: ProtocolState,
        data_chunk: RawDatasetChunk,
    ) -> ProtocolState:
        """Update state with new measurement data.

        Args:
            state: Current protocol state.
            data_chunk: New measurement data.

        Returns:
            Updated protocol state.
        """
        opt_state = state
        if not isinstance(opt_state, DirectOptimizedState):
            raise TypeError("Expected DirectOptimizedState")

        # Store bitstrings for each group
        for setting_id, bitstrings in data_chunk.bitstrings.items():
            opt_state.group_bitstrings[setting_id].extend(bitstrings)

        # Update budget tracking
        total_new_shots = sum(len(bs) for bs in data_chunk.bitstrings.values())
        opt_state.remaining_budget -= total_new_shots
        opt_state.round_number += 1

        return opt_state

    def finalize(
        self,
        state: ProtocolState,
        observable_set: ObservableSet,
    ) -> Estimates:
        """Compute final estimates from collected data.

        Args:
            state: Final protocol state.
            observable_set: Set of observables (for reference).

        Returns:
            Estimates for all observables.
        """
        opt_state = state
        if not isinstance(opt_state, DirectOptimizedState):
            raise TypeError("Expected DirectOptimizedState")

        estimates = []

        # Build mapping from observable_id to group
        obs_to_group = {}
        for group in opt_state.groups:
            for obs in group.observables:
                obs_to_group[obs.observable_id] = group

        for obs in observable_set.observables:
            group = obs_to_group.get(obs.observable_id)

            if group is None:
                estimate = ObservableEstimate(
                    observable_id=obs.observable_id,
                    estimate=0.0,
                    se=float("inf"),
                    n_shots=0,
                    n_settings=0,
                )
            else:
                bitstrings = opt_state.group_bitstrings.get(group.group_id, [])

                if not bitstrings:
                    estimate = ObservableEstimate(
                        observable_id=obs.observable_id,
                        estimate=0.0,
                        se=float("inf"),
                        n_shots=0,
                        n_settings=1,
                    )
                else:
                    # Compute expectation value from shared bitstrings
                    expectation, se = self._estimate_from_bitstrings(
                        bitstrings,
                        obs.pauli_string,
                        group.measurement_basis,
                        obs.coefficient,
                    )

                    estimate = ObservableEstimate(
                        observable_id=obs.observable_id,
                        estimate=expectation,
                        se=se,
                        n_shots=len(bitstrings),
                        n_settings=1,
                        metadata={
                            "group_id": group.group_id,
                            "allocation_weight": opt_state.allocation_weights[group.group_id],
                        },
                    )

            estimates.append(estimate)

        return Estimates(
            estimates=estimates,
            protocol_id=self.protocol_id,
            protocol_version=self.protocol_version,
            total_shots=state.total_budget - state.remaining_budget,
            metadata={
                "n_groups": len(opt_state.groups),
                "allocation_strategy": self.allocation_strategy,
            },
        )

    def _estimate_from_bitstrings(
        self,
        bitstrings: list[str],
        pauli_string: str,
        measurement_basis: str,
        coefficient: float,
    ) -> tuple[float, float]:
        """Estimate expectation value from grouped measurement bitstrings.

        Args:
            bitstrings: List of measurement outcome bitstrings.
            pauli_string: The Pauli observable being estimated.
            measurement_basis: The shared measurement basis.
            coefficient: Observable coefficient.

        Returns:
            Tuple of (expectation value, standard error).
        """
        if not bitstrings:
            return 0.0, float("inf")

        # Get positions where the observable has non-identity operators
        support = [i for i, p in enumerate(pauli_string) if p != "I"]

        # Vectorized eigenvalue computation (numpy, no Python loop)
        bs_array = np.frombuffer(
            "".join(bitstrings).encode(), dtype=np.uint8
        ).reshape(len(bitstrings), -1)[:, support]
        parities = (bs_array - 48).sum(axis=1) % 2
        eigenvalues_array = np.where(parities == 0, 1.0, -1.0)
        mean = float(np.mean(eigenvalues_array)) * coefficient
        std = float(np.std(eigenvalues_array, ddof=1))
        se = std / np.sqrt(len(eigenvalues_array)) * abs(coefficient)

        return mean, se

__init__(allocation_strategy='proportional')

Initialize protocol.

Parameters:

Name Type Description Default
allocation_strategy str

Strategy for allocating shots to groups. - "proportional": Proportional to sqrt(group_size) - "equal_se": Aim for equal SE across observables - "max_min": Maximize minimum shots per observable

'proportional'
Source code in src/quartumse/protocols/baselines/direct_optimized.py
77
78
79
80
81
82
83
84
85
86
87
def __init__(self, allocation_strategy: str = "proportional") -> None:
    """Initialize protocol.

    Args:
        allocation_strategy: Strategy for allocating shots to groups.
            - "proportional": Proportional to sqrt(group_size)
            - "equal_se": Aim for equal SE across observables
            - "max_min": Maximize minimum shots per observable
    """
    super().__init__()
    self.allocation_strategy = allocation_strategy

finalize(state, observable_set)

Compute final estimates from collected data.

Parameters:

Name Type Description Default
state ProtocolState

Final protocol state.

required
observable_set ObservableSet

Set of observables (for reference).

required

Returns:

Type Description
Estimates

Estimates for all observables.

Source code in src/quartumse/protocols/baselines/direct_optimized.py
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
def finalize(
    self,
    state: ProtocolState,
    observable_set: ObservableSet,
) -> Estimates:
    """Compute final estimates from collected data.

    Args:
        state: Final protocol state.
        observable_set: Set of observables (for reference).

    Returns:
        Estimates for all observables.
    """
    opt_state = state
    if not isinstance(opt_state, DirectOptimizedState):
        raise TypeError("Expected DirectOptimizedState")

    estimates = []

    # Build mapping from observable_id to group
    obs_to_group = {}
    for group in opt_state.groups:
        for obs in group.observables:
            obs_to_group[obs.observable_id] = group

    for obs in observable_set.observables:
        group = obs_to_group.get(obs.observable_id)

        if group is None:
            estimate = ObservableEstimate(
                observable_id=obs.observable_id,
                estimate=0.0,
                se=float("inf"),
                n_shots=0,
                n_settings=0,
            )
        else:
            bitstrings = opt_state.group_bitstrings.get(group.group_id, [])

            if not bitstrings:
                estimate = ObservableEstimate(
                    observable_id=obs.observable_id,
                    estimate=0.0,
                    se=float("inf"),
                    n_shots=0,
                    n_settings=1,
                )
            else:
                # Compute expectation value from shared bitstrings
                expectation, se = self._estimate_from_bitstrings(
                    bitstrings,
                    obs.pauli_string,
                    group.measurement_basis,
                    obs.coefficient,
                )

                estimate = ObservableEstimate(
                    observable_id=obs.observable_id,
                    estimate=expectation,
                    se=se,
                    n_shots=len(bitstrings),
                    n_settings=1,
                    metadata={
                        "group_id": group.group_id,
                        "allocation_weight": opt_state.allocation_weights[group.group_id],
                    },
                )

        estimates.append(estimate)

    return Estimates(
        estimates=estimates,
        protocol_id=self.protocol_id,
        protocol_version=self.protocol_version,
        total_shots=state.total_budget - state.remaining_budget,
        metadata={
            "n_groups": len(opt_state.groups),
            "allocation_strategy": self.allocation_strategy,
        },
    )

initialize(observable_set, total_budget, seed)

Initialize protocol state with optimal allocation.

Parameters:

Name Type Description Default
observable_set ObservableSet

Set of observables to estimate.

required
total_budget int

Total number of shots available.

required
seed int

Random seed for reproducibility.

required

Returns:

Type Description
DirectOptimizedState

Initialized DirectOptimizedState.

Source code in src/quartumse/protocols/baselines/direct_optimized.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
def initialize(
    self,
    observable_set: ObservableSet,
    total_budget: int,
    seed: int,
) -> DirectOptimizedState:
    """Initialize protocol state with optimal allocation.

    Args:
        observable_set: Set of observables to estimate.
        total_budget: Total number of shots available.
        seed: Random seed for reproducibility.

    Returns:
        Initialized DirectOptimizedState.
    """
    # Partition into commuting groups
    groups, stats = partition_observable_set(observable_set, method="greedy")

    # Compute optimal allocation
    shots_per_group, allocation_weights = self._compute_allocation(groups, total_budget)

    # Initialize storage
    group_bitstrings = {g.group_id: [] for g in groups}

    return DirectOptimizedState(
        observable_set=observable_set,
        total_budget=total_budget,
        remaining_budget=total_budget,
        seed=seed,
        n_rounds=0,
        groups=groups,
        shots_per_group=shots_per_group,
        group_bitstrings=group_bitstrings,
        allocation_weights=allocation_weights,
        metadata={
            "protocol_id": self.protocol_id,
            "n_groups": len(groups),
            "allocation_strategy": self.allocation_strategy,
            "grouping_stats": stats,
        },
    )

plan(state)

Generate measurement plan with optimal allocation.

Parameters:

Name Type Description Default
state ProtocolState

Current protocol state.

required

Returns:

Type Description
MeasurementPlan

MeasurementPlan with G settings and optimal shot distribution.

Source code in src/quartumse/protocols/baselines/direct_optimized.py
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
def plan(
    self,
    state: ProtocolState,
) -> MeasurementPlan:
    """Generate measurement plan with optimal allocation.

    Args:
        state: Current protocol state.

    Returns:
        MeasurementPlan with G settings and optimal shot distribution.
    """
    opt_state = state
    if not isinstance(opt_state, DirectOptimizedState):
        raise TypeError("Expected DirectOptimizedState")

    settings = []
    shots_per_setting = []
    observable_setting_map: dict[str, list[int]] = {}

    for i, group in enumerate(opt_state.groups):
        setting = MeasurementSetting(
            setting_id=group.group_id,
            measurement_basis=group.measurement_basis,
            target_qubits=list(range(opt_state.observable_set.n_qubits)),
            metadata={
                "group_size": group.size,
                "allocation_weight": opt_state.allocation_weights[group.group_id],
                "observable_ids": [obs.observable_id for obs in group.observables],
            },
        )
        settings.append(setting)
        shots_per_setting.append(opt_state.shots_per_group[group.group_id])

        # Map each observable in this group to this setting
        for obs in group.observables:
            observable_setting_map[obs.observable_id] = [i]

    return MeasurementPlan(
        settings=settings,
        shots_per_setting=shots_per_setting,
        observable_setting_map=observable_setting_map,
        metadata={
            "n_groups": len(opt_state.groups),
            "allocation_strategy": self.allocation_strategy,
        },
    )

update(state, data_chunk)

Update state with new measurement data.

Parameters:

Name Type Description Default
state ProtocolState

Current protocol state.

required
data_chunk RawDatasetChunk

New measurement data.

required

Returns:

Type Description
ProtocolState

Updated protocol state.

Source code in src/quartumse/protocols/baselines/direct_optimized.py
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
def update(
    self,
    state: ProtocolState,
    data_chunk: RawDatasetChunk,
) -> ProtocolState:
    """Update state with new measurement data.

    Args:
        state: Current protocol state.
        data_chunk: New measurement data.

    Returns:
        Updated protocol state.
    """
    opt_state = state
    if not isinstance(opt_state, DirectOptimizedState):
        raise TypeError("Expected DirectOptimizedState")

    # Store bitstrings for each group
    for setting_id, bitstrings in data_chunk.bitstrings.items():
        opt_state.group_bitstrings[setting_id].extend(bitstrings)

    # Update budget tracking
    total_new_shots = sum(len(bs) for bs in data_chunk.bitstrings.values())
    opt_state.remaining_budget -= total_new_shots
    opt_state.round_number += 1

    return opt_state

Estimates dataclass

Complete estimation results for all observables (§5.3).

Supports both list-based and dict-based storage of estimates.

Attributes:

Name Type Description
estimates list[ObservableEstimate]

List of ObservableEstimate objects.

observable_estimates dict[str, ObservableEstimate]

Dict mapping observable_id to ObservableEstimate (computed).

total_shots int

Total shots used for estimation.

n_settings int

Number of distinct measurement settings used.

time_quantum_s float | None

Quantum execution time in seconds.

time_classical_s float | None

Classical processing time in seconds.

protocol_id str | None

ID of the protocol that produced these estimates.

protocol_version str | None

Version of the protocol.

ci_method_id str | None

CI method used (if uniform across observables).

metadata dict[str, Any]

Additional metadata.

Source code in src/quartumse/protocols/state.py
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
@dataclass
class Estimates:
    """Complete estimation results for all observables (§5.3).

    Supports both list-based and dict-based storage of estimates.

    Attributes:
        estimates: List of ObservableEstimate objects.
        observable_estimates: Dict mapping observable_id to ObservableEstimate (computed).
        total_shots: Total shots used for estimation.
        n_settings: Number of distinct measurement settings used.
        time_quantum_s: Quantum execution time in seconds.
        time_classical_s: Classical processing time in seconds.
        protocol_id: ID of the protocol that produced these estimates.
        protocol_version: Version of the protocol.
        ci_method_id: CI method used (if uniform across observables).
        metadata: Additional metadata.
    """

    # Primary storage as list (used by protocols)
    estimates: list[ObservableEstimate] = field(default_factory=list)

    # Optional: total shots and settings
    total_shots: int = 0
    n_settings: int = 0

    # Timing
    time_quantum_s: float | None = None
    time_classical_s: float | None = None
    timing_breakdown: TimingBreakdown | None = None

    # Timeout
    timed_out: bool = False
    n_shots_completed: int | None = None

    # Protocol info
    protocol_id: str | None = None
    protocol_version: str | None = None
    ci_method_id: str | None = None
    metadata: dict[str, Any] = field(default_factory=dict)

    # Optional: raw shot-level data for post-hoc analysis (e.g., pilot subsampling)
    raw_chunks: list[RawDatasetChunk] | None = field(default=None, repr=False)

    @property
    def observable_estimates(self) -> dict[str, ObservableEstimate]:
        """Dict view mapping observable_id to ObservableEstimate."""
        return {est.observable_id: est for est in self.estimates}

    @property
    def n_observables(self) -> int:
        """Number of observables estimated."""
        return len(self.estimates)

    def get_estimate(self, observable_id: str) -> ObservableEstimate:
        """Get estimate for a specific observable."""
        for est in self.estimates:
            if est.observable_id == observable_id:
                return est
        raise KeyError(f"Observable {observable_id} not found in estimates")

    def max_se(self) -> float:
        """Maximum standard error across all observables."""
        if not self.estimates:
            return float("inf")
        return max(est.se for est in self.estimates)

    def mean_se(self) -> float:
        """Mean standard error across all observables."""
        ses = [est.se for est in self.observable_estimates.values()]
        return sum(ses) / len(ses)

    def max_ci_half_width(self) -> float | None:
        """Maximum CI half-width across all observables."""
        half_widths = [
            est.ci.half_width for est in self.observable_estimates.values() if est.ci is not None
        ]
        return max(half_widths) if half_widths else None

    def all_within_target(self, epsilon: float, use_ci: bool = True) -> bool:
        """Check if all observables meet precision target.

        Args:
            epsilon: Target precision (SE or CI half-width).
            use_ci: If True, check CI half-width; otherwise check SE.

        Returns:
            True if all observables meet the target.
        """
        for est in self.observable_estimates.values():
            if use_ci and est.ci is not None:
                if est.ci.half_width > epsilon:
                    return False
            elif est.se > epsilon:
                return False
        return True

    def to_dict(self) -> dict[str, Any]:
        """Convert to dictionary for serialization."""
        return {
            "observable_estimates": {
                obs_id: {
                    "observable_id": est.observable_id,
                    "estimate": est.estimate,
                    "se": est.se,
                    "variance": est.variance,
                    "ci": (
                        {
                            "ci_low": est.ci.ci_low,
                            "ci_high": est.ci.ci_high,
                            "ci_low_raw": est.ci.ci_low_raw,
                            "ci_high_raw": est.ci.ci_high_raw,
                            "confidence_level": est.ci.confidence_level,
                            "method": est.ci.method.value,
                            "clamped": est.ci.clamped,
                        }
                        if est.ci
                        else None
                    ),
                    "effective_sample_size": est.effective_sample_size,
                    "diagnostics": est.diagnostics,
                }
                for obs_id, est in self.observable_estimates.items()
            },
            "n_observables": self.n_observables,
            "total_shots": self.total_shots,
            "n_settings": self.n_settings,
            "time_quantum_s": self.time_quantum_s,
            "time_classical_s": self.time_classical_s,
            "protocol_id": self.protocol_id,
            "protocol_version": self.protocol_version,
            "ci_method_id": self.ci_method_id,
            "metadata": self.metadata,
        }

n_observables property

Number of observables estimated.

observable_estimates property

Dict view mapping observable_id to ObservableEstimate.

all_within_target(epsilon, use_ci=True)

Check if all observables meet precision target.

Parameters:

Name Type Description Default
epsilon float

Target precision (SE or CI half-width).

required
use_ci bool

If True, check CI half-width; otherwise check SE.

True

Returns:

Type Description
bool

True if all observables meet the target.

Source code in src/quartumse/protocols/state.py
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
def all_within_target(self, epsilon: float, use_ci: bool = True) -> bool:
    """Check if all observables meet precision target.

    Args:
        epsilon: Target precision (SE or CI half-width).
        use_ci: If True, check CI half-width; otherwise check SE.

    Returns:
        True if all observables meet the target.
    """
    for est in self.observable_estimates.values():
        if use_ci and est.ci is not None:
            if est.ci.half_width > epsilon:
                return False
        elif est.se > epsilon:
            return False
    return True

get_estimate(observable_id)

Get estimate for a specific observable.

Source code in src/quartumse/protocols/state.py
386
387
388
389
390
391
def get_estimate(self, observable_id: str) -> ObservableEstimate:
    """Get estimate for a specific observable."""
    for est in self.estimates:
        if est.observable_id == observable_id:
            return est
    raise KeyError(f"Observable {observable_id} not found in estimates")

max_ci_half_width()

Maximum CI half-width across all observables.

Source code in src/quartumse/protocols/state.py
404
405
406
407
408
409
def max_ci_half_width(self) -> float | None:
    """Maximum CI half-width across all observables."""
    half_widths = [
        est.ci.half_width for est in self.observable_estimates.values() if est.ci is not None
    ]
    return max(half_widths) if half_widths else None

max_se()

Maximum standard error across all observables.

Source code in src/quartumse/protocols/state.py
393
394
395
396
397
def max_se(self) -> float:
    """Maximum standard error across all observables."""
    if not self.estimates:
        return float("inf")
    return max(est.se for est in self.estimates)

mean_se()

Mean standard error across all observables.

Source code in src/quartumse/protocols/state.py
399
400
401
402
def mean_se(self) -> float:
    """Mean standard error across all observables."""
    ses = [est.se for est in self.observable_estimates.values()]
    return sum(ses) / len(ses)

to_dict()

Convert to dictionary for serialization.

Source code in src/quartumse/protocols/state.py
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
def to_dict(self) -> dict[str, Any]:
    """Convert to dictionary for serialization."""
    return {
        "observable_estimates": {
            obs_id: {
                "observable_id": est.observable_id,
                "estimate": est.estimate,
                "se": est.se,
                "variance": est.variance,
                "ci": (
                    {
                        "ci_low": est.ci.ci_low,
                        "ci_high": est.ci.ci_high,
                        "ci_low_raw": est.ci.ci_low_raw,
                        "ci_high_raw": est.ci.ci_high_raw,
                        "confidence_level": est.ci.confidence_level,
                        "method": est.ci.method.value,
                        "clamped": est.ci.clamped,
                    }
                    if est.ci
                    else None
                ),
                "effective_sample_size": est.effective_sample_size,
                "diagnostics": est.diagnostics,
            }
            for obs_id, est in self.observable_estimates.items()
        },
        "n_observables": self.n_observables,
        "total_shots": self.total_shots,
        "n_settings": self.n_settings,
        "time_quantum_s": self.time_quantum_s,
        "time_classical_s": self.time_classical_s,
        "protocol_id": self.protocol_id,
        "protocol_version": self.protocol_version,
        "ci_method_id": self.ci_method_id,
        "metadata": self.metadata,
    }

Estimator

Bases: ABC

Abstract base class for quantum observable estimators.

Provides unified interface for different estimation strategies: - Classical shadows (various versions) - Direct measurement - Grouped Pauli measurement

Source code in src/quartumse/estimator/base.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
class Estimator(ABC):
    """
    Abstract base class for quantum observable estimators.

    Provides unified interface for different estimation strategies:
    - Classical shadows (various versions)
    - Direct measurement
    - Grouped Pauli measurement
    """

    def __init__(self, backend: Any, config: Any | None = None) -> None:
        self.backend = backend
        self.config = config

    @abstractmethod
    def estimate(
        self,
        circuit: QuantumCircuit,
        observables: list[Observable],
        target_precision: float | None = None,
    ) -> EstimationResult:
        """
        Estimate expectation values of observables.

        Args:
            circuit: State preparation circuit
            observables: List of observables to estimate
            target_precision: Desired precision (optional)

        Returns:
            Estimation results with confidence intervals
        """
        raise NotImplementedError

    @abstractmethod
    def estimate_shots_needed(self, observables: list[Observable], target_precision: float) -> int:
        """
        Estimate number of shots needed for target precision.

        Used for cost estimation and shot allocation.
        """
        raise NotImplementedError

estimate(circuit, observables, target_precision=None) abstractmethod

Estimate expectation values of observables.

Parameters:

Name Type Description Default
circuit QuantumCircuit

State preparation circuit

required
observables list[Observable]

List of observables to estimate

required
target_precision float | None

Desired precision (optional)

None

Returns:

Type Description
EstimationResult

Estimation results with confidence intervals

Source code in src/quartumse/estimator/base.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
@abstractmethod
def estimate(
    self,
    circuit: QuantumCircuit,
    observables: list[Observable],
    target_precision: float | None = None,
) -> EstimationResult:
    """
    Estimate expectation values of observables.

    Args:
        circuit: State preparation circuit
        observables: List of observables to estimate
        target_precision: Desired precision (optional)

    Returns:
        Estimation results with confidence intervals
    """
    raise NotImplementedError

estimate_shots_needed(observables, target_precision) abstractmethod

Estimate number of shots needed for target precision.

Used for cost estimation and shot allocation.

Source code in src/quartumse/estimator/base.py
68
69
70
71
72
73
74
75
@abstractmethod
def estimate_shots_needed(self, observables: list[Observable], target_precision: float) -> int:
    """
    Estimate number of shots needed for target precision.

    Used for cost estimation and shot allocation.
    """
    raise NotImplementedError

FWERMethod

Bases: str, Enum

Method for controlling family-wise error rate.

Source code in src/quartumse/stats/fwer.py
28
29
30
31
32
33
34
class FWERMethod(str, Enum):
    """Method for controlling family-wise error rate."""

    BONFERRONI = "bonferroni"
    SIDAK = "sidak"
    HOLM = "holm"
    NONE = "none"  # No adjustment

LongFormRow

Bases: BaseModel

A single row in the long-form results table (§10.1).

This schema defines all required columns for the tidy long-form output. Each row corresponds to one observable estimate from one protocol run.

Source code in src/quartumse/io/schemas.py
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
class LongFormRow(BaseModel):
    """A single row in the long-form results table (§10.1).

    This schema defines all required columns for the tidy long-form output.
    Each row corresponds to one observable estimate from one protocol run.
    """

    # === Identifiers ===
    run_id: str = Field(description="Unique identifier for this benchmark run")
    methodology_version: str = Field(description="Version of the Measurements Bible methodology")
    circuit_id: str = Field(description="Identifier for the circuit instance")
    observable_set_id: str = Field(description="Identifier for the observable set")
    observable_id: str = Field(description="Identifier for this specific observable")
    protocol_id: str = Field(description="Protocol identifier")
    protocol_version: str = Field(description="Protocol version")
    backend_id: str = Field(description="Backend identifier")
    noise_profile_id: str = Field(default="ideal", description="Noise profile identifier")
    replicate_id: int = Field(description="Replicate number (0-indexed)")

    # === Seeds ===
    seed_policy: str = Field(description="Policy used to derive run seeds")
    seed_protocol: int = Field(description="Seed for protocol planning randomness")
    seed_acquire: int = Field(description="Seed for measurement sampling")
    seed_bootstrap: int | None = Field(default=None, description="Seed for bootstrap CI (if used)")

    # === Problem descriptors ===
    n_qubits: int = Field(description="Number of qubits in the circuit")
    circuit_depth: int | None = Field(default=None, description="Circuit depth (if meaningful)")
    twoq_gate_count: int | None = Field(default=None, description="Number of 2-qubit gates")
    observable_type: str = Field(description="Observable type: pauli_string, pauli_sum, matrix")
    locality: int = Field(description="Pauli weight / locality")
    coefficient: float = Field(default=1.0, description="Observable coefficient")
    group_id: str | None = Field(default=None, description="Commuting group ID (if grouped)")
    M_total: int = Field(description="Total number of observables in the set")

    # === Budget and resources ===
    N_total: int = Field(description="Total shots used")
    n_settings: int = Field(description="Number of distinct measurement settings")
    time_quantum_s: float | None = Field(
        default=None, description="Quantum execution time in seconds"
    )
    time_classical_s: float | None = Field(
        default=None, description="Classical processing time in seconds"
    )
    memory_bytes: int | None = Field(default=None, description="Peak memory usage")

    # === Timing breakdown (optional) ===
    time_total_s: float | None = Field(default=None, description="Total wall-clock time in seconds")
    time_pre_compute_s: float | None = Field(
        default=None, description="Pre-compute (initialize + plan) time in seconds"
    )
    time_aer_simulate_s: float | None = Field(
        default=None, description="AER simulation time in seconds"
    )
    time_post_process_s: float | None = Field(
        default=None, description="Post-processing (update + finalize) time in seconds"
    )
    est_quantum_hw_s: float | None = Field(
        default=None, description="Estimated quantum hardware execution time in seconds"
    )

    # === Timeout (optional) ===
    timed_out: bool = Field(default=False, description="Whether the protocol run timed out")
    n_shots_completed: int | None = Field(
        default=None, description="Actual shots completed (when timed out)"
    )

    # === Cost (optional) ===
    cost_model_id: str | None = Field(default=None, description="Cost model identifier")
    cost_usd_estimate: float | None = Field(default=None, description="Estimated cost in USD")

    # === Hardware-specific (optional) ===
    job_status: JobStatus | None = Field(default=None, description="Job execution status")
    queue_time_s: float | None = Field(default=None, description="Queue time in seconds")
    job_submitted_at: datetime | None = Field(default=None, description="Job submission timestamp")
    job_started_at: datetime | None = Field(default=None, description="Job start timestamp")
    job_completed_at: datetime | None = Field(default=None, description="Job completion timestamp")

    # === Estimation results ===
    estimate: float = Field(description="Point estimate of expectation value")
    se: float = Field(description="Standard error of the estimate")
    ci_low_raw: float | None = Field(default=None, description="CI lower bound (before clamping)")
    ci_high_raw: float | None = Field(default=None, description="CI upper bound (before clamping)")
    ci_low: float | None = Field(default=None, description="CI lower bound (after clamping)")
    ci_high: float | None = Field(default=None, description="CI upper bound (after clamping)")
    ci_method_id: str | None = Field(default=None, description="CI construction method")
    confidence_level: float = Field(default=0.95, description="Confidence level for CI")

    # === Truth (if available) ===
    truth_value: float | None = Field(default=None, description="Ground truth expectation value")
    truth_se: float | None = Field(default=None, description="SE of truth (if reference truth)")
    truth_mode: str | None = Field(
        default=None,
        description="Truth mode: exact_statevector, exact_density_matrix, reference",
    )

    # === Derived metrics ===
    abs_err: float | None = Field(default=None, description="Absolute error |estimate - truth|")
    sq_err: float | None = Field(default=None, description="Squared error (estimate - truth)^2")

    # === Additional metadata ===
    metadata: dict[str, Any] = Field(default_factory=dict, description="Additional metadata")

    def compute_derived_metrics(self) -> None:
        """Compute derived metrics from truth if available."""
        if self.truth_value is not None:
            self.abs_err = abs(self.estimate - self.truth_value)
            self.sq_err = (self.estimate - self.truth_value) ** 2

    class Config:
        """Pydantic configuration."""

        use_enum_values = True

Config

Pydantic configuration.

Source code in src/quartumse/io/schemas.py
141
142
143
144
class Config:
    """Pydantic configuration."""

    use_enum_values = True

compute_derived_metrics()

Compute derived metrics from truth if available.

Source code in src/quartumse/io/schemas.py
135
136
137
138
139
def compute_derived_metrics(self) -> None:
    """Compute derived metrics from truth if available."""
    if self.truth_value is not None:
        self.abs_err = abs(self.estimate - self.truth_value)
        self.sq_err = (self.estimate - self.truth_value) ** 2

NoiseProfile dataclass

A noise profile specification.

Attributes:

Name Type Description
profile_id str

Unique identifier for this profile.

noise_type NoiseType

Type of noise model.

parameters dict[str, float]

Noise model parameters.

description str

Human-readable description.

metadata dict[str, Any]

Additional metadata.

Source code in src/quartumse/noise/profiles.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
@dataclass
class NoiseProfile:
    """A noise profile specification.

    Attributes:
        profile_id: Unique identifier for this profile.
        noise_type: Type of noise model.
        parameters: Noise model parameters.
        description: Human-readable description.
        metadata: Additional metadata.
    """

    profile_id: str
    noise_type: NoiseType
    parameters: dict[str, float] = field(default_factory=dict)
    description: str = ""
    metadata: dict[str, Any] = field(default_factory=dict)

    def to_dict(self) -> dict[str, Any]:
        """Convert to dictionary for serialization."""
        return {
            "profile_id": self.profile_id,
            "noise_type": self.noise_type.value,
            "parameters": self.parameters,
            "description": self.description,
            "metadata": self.metadata,
        }

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> NoiseProfile:
        """Create from dictionary."""
        return cls(
            profile_id=data["profile_id"],
            noise_type=NoiseType(data["noise_type"]),
            parameters=data.get("parameters", {}),
            description=data.get("description", ""),
            metadata=data.get("metadata", {}),
        )

from_dict(data) classmethod

Create from dictionary.

Source code in src/quartumse/noise/profiles.py
63
64
65
66
67
68
69
70
71
72
@classmethod
def from_dict(cls, data: dict[str, Any]) -> NoiseProfile:
    """Create from dictionary."""
    return cls(
        profile_id=data["profile_id"],
        noise_type=NoiseType(data["noise_type"]),
        parameters=data.get("parameters", {}),
        description=data.get("description", ""),
        metadata=data.get("metadata", {}),
    )

to_dict()

Convert to dictionary for serialization.

Source code in src/quartumse/noise/profiles.py
53
54
55
56
57
58
59
60
61
def to_dict(self) -> dict[str, Any]:
    """Convert to dictionary for serialization."""
    return {
        "profile_id": self.profile_id,
        "noise_type": self.noise_type.value,
        "parameters": self.parameters,
        "description": self.description,
        "metadata": self.metadata,
    }

ObjectiveAnalysis dataclass

Complete objective-level analysis for a weighted suite.

Source code in src/quartumse/analysis/objective_metrics.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
@dataclass
class ObjectiveAnalysis:
    """Complete objective-level analysis for a weighted suite."""

    # Per-protocol estimates at each shot budget
    estimates_by_protocol: dict[str, dict[int, ObjectiveEstimate]] = field(default_factory=dict)

    # N* for objective (shots needed to reach target error)
    n_star_objective: dict[str, int | None] = field(default_factory=dict)

    # Protocol comparison
    winner_at_max_n: str = ""
    objective_ratio: float = 1.0  # shadows_error / baseline_error

    # Metadata
    objective_type: str = ""  # "qaoa_cost", "energy", etc.
    target_epsilon: float = 0.01

ObjectiveEstimate dataclass

Result of estimating a weighted objective.

Source code in src/quartumse/analysis/objective_metrics.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
@dataclass
class ObjectiveEstimate:
    """Result of estimating a weighted objective."""

    # Point estimate
    estimate: float

    # Uncertainty quantification
    se: float | None = None  # Standard error (from bootstrap or analytic)
    ci_low: float | None = None  # 95% CI lower bound
    ci_high: float | None = None  # 95% CI upper bound

    # Comparison to truth
    true_value: float | None = None
    abs_error: float | None = None
    rel_error: float | None = None

    # Metadata
    n_observables: int = 0
    n_shots: int = 0
    protocol_id: str = ""

ObjectiveType

Bases: Enum

Type of objective function for the suite.

Source code in src/quartumse/observables/suites.py
30
31
32
33
34
35
36
class ObjectiveType(Enum):
    """Type of objective function for the suite."""

    PER_OBSERVABLE = "per_observable"  # Evaluate each observable independently
    WEIGHTED_SUM = "weighted_sum"  # Evaluate sum: E = Σ w_k ⟨O_k⟩
    MAX_ERROR = "max_error"  # Evaluate worst-case observable
    CUSTOM = "custom"  # User-defined objective

Observable dataclass

A quantum observable with full metadata (§3.2).

An observable represents a Hermitian operator whose expectation value we want to estimate. The primary representation is a Pauli string (e.g., "XYZII") with an optional coefficient.

Attributes:

Name Type Description
pauli_string str

Pauli string representation (e.g., "XYZII").

coefficient float

Multiplicative coefficient (default 1.0).

observable_id str | None

Unique identifier. Auto-generated if not provided.

group_id str | None

Group identifier for commuting families (None if ungrouped).

metadata dict[str, Any]

Additional observable-specific metadata.

Source code in src/quartumse/observables/core.py
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
@dataclass
class Observable:
    """A quantum observable with full metadata (§3.2).

    An observable represents a Hermitian operator whose expectation value
    we want to estimate. The primary representation is a Pauli string
    (e.g., "XYZII") with an optional coefficient.

    Attributes:
        pauli_string: Pauli string representation (e.g., "XYZII").
        coefficient: Multiplicative coefficient (default 1.0).
        observable_id: Unique identifier. Auto-generated if not provided.
        group_id: Group identifier for commuting families (None if ungrouped).
        metadata: Additional observable-specific metadata.
    """

    pauli_string: str
    coefficient: float = 1.0
    observable_id: str | None = None
    group_id: str | None = None
    metadata: dict[str, Any] = field(default_factory=dict)
    # Cached computed properties for performance
    _cached_locality: int | None = field(default=None, repr=False, compare=False)
    _cached_support: list[int] | None = field(default=None, repr=False, compare=False)
    _cached_basis_indices: NDArray[np.int_] | None = field(
        default=None, repr=False, compare=False
    )
    _cached_sparse_matrix: Any | None = field(default=None, repr=False, compare=False)
    _cached_dense_matrix: NDArray[np.complexfloating] | None = field(
        default=None, repr=False, compare=False
    )

    def __post_init__(self) -> None:
        """Validate and set defaults."""
        # Validate Pauli string
        valid_chars = set("IXYZ")
        if not all(c in valid_chars for c in self.pauli_string):
            invalid = set(self.pauli_string) - valid_chars
            raise ValueError(
                f"Invalid characters in Pauli string: {invalid}. " f"Must be one of I, X, Y, Z."
            )

        # Auto-generate observable_id if not provided
        if self.observable_id is None:
            # Create a short hash-based ID
            hash_input = f"{self.pauli_string}:{self.coefficient}"
            short_hash = hashlib.sha256(hash_input.encode()).hexdigest()[:8]
            self.observable_id = f"obs_{short_hash}"

        # Pre-compute locality and support for performance (avoid repeated iteration)
        self._cached_support = [i for i, c in enumerate(self.pauli_string) if c != "I"]
        self._cached_locality = len(self._cached_support)
        if self._cached_support:
            pauli_to_basis = {"X": 1, "Y": 2, "Z": 0}
            self._cached_basis_indices = np.array(
                [pauli_to_basis[self.pauli_string[q]] for q in self._cached_support],
                dtype=int,
            )

    @property
    def n_qubits(self) -> int:
        """Number of qubits this observable acts on."""
        return len(self.pauli_string)

    @property
    def observable_type(self) -> ObservableType:
        """Type of observable representation."""
        return ObservableType.PAULI_STRING

    @property
    def locality(self) -> int:
        """Pauli weight (number of non-identity factors). Cached for performance."""
        if self._cached_locality is None:
            self._cached_locality = sum(1 for c in self.pauli_string if c != "I")
        return self._cached_locality

    @property
    def weight(self) -> int:
        """Alias for locality (Pauli weight)."""
        return self.locality

    @property
    def support(self) -> list[int]:
        """Qubit indices where this observable acts non-trivially. Cached for performance."""
        if self._cached_support is None:
            self._cached_support = [i for i, c in enumerate(self.pauli_string) if c != "I"]
        return self._cached_support

    @property
    def basis_indices(self) -> NDArray[np.int_]:
        """Measurement basis indices for non-identity terms (X->1, Y->2, Z->0)."""
        if self._cached_basis_indices is None:
            pauli_to_basis = {"X": 1, "Y": 2, "Z": 0}
            support = self.support
            self._cached_basis_indices = np.array(
                [pauli_to_basis[self.pauli_string[q]] for q in support], dtype=int
            )
        return self._cached_basis_indices

    def to_matrix(self) -> NDArray[np.complexfloating]:
        """Convert to dense matrix representation."""
        if self._cached_dense_matrix is None:
            result = np.array([[1.0]], dtype=complex)
            for pauli_char in self.pauli_string:
                result = np.kron(result, PAULI_MATRICES[pauli_char])
            self._cached_dense_matrix = self.coefficient * result
        return self._cached_dense_matrix

    def to_sparse_matrix(self) -> Any:
        """Convert to sparse matrix representation (opt-in for large systems)."""
        if self._cached_sparse_matrix is None:
            from scipy import sparse

            sparse_paulis = {k: sparse.csr_matrix(v) for k, v in PAULI_MATRICES.items()}
            result = sparse.csr_matrix([[1.0]], dtype=complex)
            for pauli_char in self.pauli_string:
                result = sparse.kron(result, sparse_paulis[pauli_char], format="csr")
            self._cached_sparse_matrix = self.coefficient * result
        return self._cached_sparse_matrix

    def commutes_with(self, other: Observable) -> bool:
        """Check if this observable commutes with another.

        Two Pauli strings commute if they differ on an even number of
        qubits (excluding positions where either is identity).
        """
        if self.n_qubits != other.n_qubits:
            raise ValueError(
                f"Cannot compare observables with different qubit counts: "
                f"{self.n_qubits} vs {other.n_qubits}"
            )

        anticommute_count = 0
        for p1, p2 in zip(self.pauli_string, other.pauli_string, strict=False):
            if p1 != "I" and p2 != "I" and p1 != p2:
                anticommute_count += 1

        return anticommute_count % 2 == 0

    def shared_basis(self, other: Observable) -> str | None:
        """Get shared measurement basis if observables commute qubit-wise.

        Returns None if no shared basis exists (observables don't commute
        qubit-wise, though they may still commute globally).
        """
        if self.n_qubits != other.n_qubits:
            return None

        basis = []
        for p1, p2 in zip(self.pauli_string, other.pauli_string, strict=False):
            if p1 == "I":
                basis.append(p2 if p2 != "I" else "Z")  # Default to Z
            elif p2 == "I":
                basis.append(p1)
            elif p1 == p2:
                basis.append(p1)
            else:
                return None  # Conflict on this qubit

        return "".join(basis)

    def to_dict(self) -> dict[str, Any]:
        """Convert to dictionary for serialization."""
        return {
            "observable_id": self.observable_id,
            "pauli_string": self.pauli_string,
            "coefficient": self.coefficient,
            "observable_type": self.observable_type.value,
            "locality": self.locality,
            "n_qubits": self.n_qubits,
            "group_id": self.group_id,
            "metadata": self.metadata,
        }

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> Observable:
        """Create from dictionary."""
        return cls(
            pauli_string=data["pauli_string"],
            coefficient=data.get("coefficient", 1.0),
            observable_id=data.get("observable_id"),
            group_id=data.get("group_id"),
            metadata=data.get("metadata", {}),
        )

    def __str__(self) -> str:
        """String representation."""
        if self.coefficient == 1.0:
            return self.pauli_string
        return f"{self.coefficient}*{self.pauli_string}"

    def __repr__(self) -> str:
        """Detailed representation."""
        return (
            f"Observable('{self.pauli_string}', coef={self.coefficient}, id={self.observable_id})"
        )

    def __hash__(self) -> int:
        """Hash based on Pauli string and coefficient."""
        return hash((self.pauli_string, self.coefficient))

    def __eq__(self, other: object) -> bool:
        """Equality based on Pauli string and coefficient."""
        if not isinstance(other, Observable):
            return False
        return self.pauli_string == other.pauli_string and np.isclose(
            self.coefficient, other.coefficient
        )

basis_indices property

Measurement basis indices for non-identity terms (X->1, Y->2, Z->0).

locality property

Pauli weight (number of non-identity factors). Cached for performance.

n_qubits property

Number of qubits this observable acts on.

observable_type property

Type of observable representation.

support property

Qubit indices where this observable acts non-trivially. Cached for performance.

weight property

Alias for locality (Pauli weight).

__eq__(other)

Equality based on Pauli string and coefficient.

Source code in src/quartumse/observables/core.py
250
251
252
253
254
255
256
def __eq__(self, other: object) -> bool:
    """Equality based on Pauli string and coefficient."""
    if not isinstance(other, Observable):
        return False
    return self.pauli_string == other.pauli_string and np.isclose(
        self.coefficient, other.coefficient
    )

__hash__()

Hash based on Pauli string and coefficient.

Source code in src/quartumse/observables/core.py
246
247
248
def __hash__(self) -> int:
    """Hash based on Pauli string and coefficient."""
    return hash((self.pauli_string, self.coefficient))

__post_init__()

Validate and set defaults.

Source code in src/quartumse/observables/core.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
def __post_init__(self) -> None:
    """Validate and set defaults."""
    # Validate Pauli string
    valid_chars = set("IXYZ")
    if not all(c in valid_chars for c in self.pauli_string):
        invalid = set(self.pauli_string) - valid_chars
        raise ValueError(
            f"Invalid characters in Pauli string: {invalid}. " f"Must be one of I, X, Y, Z."
        )

    # Auto-generate observable_id if not provided
    if self.observable_id is None:
        # Create a short hash-based ID
        hash_input = f"{self.pauli_string}:{self.coefficient}"
        short_hash = hashlib.sha256(hash_input.encode()).hexdigest()[:8]
        self.observable_id = f"obs_{short_hash}"

    # Pre-compute locality and support for performance (avoid repeated iteration)
    self._cached_support = [i for i, c in enumerate(self.pauli_string) if c != "I"]
    self._cached_locality = len(self._cached_support)
    if self._cached_support:
        pauli_to_basis = {"X": 1, "Y": 2, "Z": 0}
        self._cached_basis_indices = np.array(
            [pauli_to_basis[self.pauli_string[q]] for q in self._cached_support],
            dtype=int,
        )

__repr__()

Detailed representation.

Source code in src/quartumse/observables/core.py
240
241
242
243
244
def __repr__(self) -> str:
    """Detailed representation."""
    return (
        f"Observable('{self.pauli_string}', coef={self.coefficient}, id={self.observable_id})"
    )

__str__()

String representation.

Source code in src/quartumse/observables/core.py
234
235
236
237
238
def __str__(self) -> str:
    """String representation."""
    if self.coefficient == 1.0:
        return self.pauli_string
    return f"{self.coefficient}*{self.pauli_string}"

commutes_with(other)

Check if this observable commutes with another.

Two Pauli strings commute if they differ on an even number of qubits (excluding positions where either is identity).

Source code in src/quartumse/observables/core.py
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
def commutes_with(self, other: Observable) -> bool:
    """Check if this observable commutes with another.

    Two Pauli strings commute if they differ on an even number of
    qubits (excluding positions where either is identity).
    """
    if self.n_qubits != other.n_qubits:
        raise ValueError(
            f"Cannot compare observables with different qubit counts: "
            f"{self.n_qubits} vs {other.n_qubits}"
        )

    anticommute_count = 0
    for p1, p2 in zip(self.pauli_string, other.pauli_string, strict=False):
        if p1 != "I" and p2 != "I" and p1 != p2:
            anticommute_count += 1

    return anticommute_count % 2 == 0

from_dict(data) classmethod

Create from dictionary.

Source code in src/quartumse/observables/core.py
223
224
225
226
227
228
229
230
231
232
@classmethod
def from_dict(cls, data: dict[str, Any]) -> Observable:
    """Create from dictionary."""
    return cls(
        pauli_string=data["pauli_string"],
        coefficient=data.get("coefficient", 1.0),
        observable_id=data.get("observable_id"),
        group_id=data.get("group_id"),
        metadata=data.get("metadata", {}),
    )

shared_basis(other)

Get shared measurement basis if observables commute qubit-wise.

Returns None if no shared basis exists (observables don't commute qubit-wise, though they may still commute globally).

Source code in src/quartumse/observables/core.py
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
def shared_basis(self, other: Observable) -> str | None:
    """Get shared measurement basis if observables commute qubit-wise.

    Returns None if no shared basis exists (observables don't commute
    qubit-wise, though they may still commute globally).
    """
    if self.n_qubits != other.n_qubits:
        return None

    basis = []
    for p1, p2 in zip(self.pauli_string, other.pauli_string, strict=False):
        if p1 == "I":
            basis.append(p2 if p2 != "I" else "Z")  # Default to Z
        elif p2 == "I":
            basis.append(p1)
        elif p1 == p2:
            basis.append(p1)
        else:
            return None  # Conflict on this qubit

    return "".join(basis)

to_dict()

Convert to dictionary for serialization.

Source code in src/quartumse/observables/core.py
210
211
212
213
214
215
216
217
218
219
220
221
def to_dict(self) -> dict[str, Any]:
    """Convert to dictionary for serialization."""
    return {
        "observable_id": self.observable_id,
        "pauli_string": self.pauli_string,
        "coefficient": self.coefficient,
        "observable_type": self.observable_type.value,
        "locality": self.locality,
        "n_qubits": self.n_qubits,
        "group_id": self.group_id,
        "metadata": self.metadata,
    }

to_matrix()

Convert to dense matrix representation.

Source code in src/quartumse/observables/core.py
148
149
150
151
152
153
154
155
def to_matrix(self) -> NDArray[np.complexfloating]:
    """Convert to dense matrix representation."""
    if self._cached_dense_matrix is None:
        result = np.array([[1.0]], dtype=complex)
        for pauli_char in self.pauli_string:
            result = np.kron(result, PAULI_MATRICES[pauli_char])
        self._cached_dense_matrix = self.coefficient * result
    return self._cached_dense_matrix

to_sparse_matrix()

Convert to sparse matrix representation (opt-in for large systems).

Source code in src/quartumse/observables/core.py
157
158
159
160
161
162
163
164
165
166
167
def to_sparse_matrix(self) -> Any:
    """Convert to sparse matrix representation (opt-in for large systems)."""
    if self._cached_sparse_matrix is None:
        from scipy import sparse

        sparse_paulis = {k: sparse.csr_matrix(v) for k, v in PAULI_MATRICES.items()}
        result = sparse.csr_matrix([[1.0]], dtype=complex)
        for pauli_char in self.pauli_string:
            result = sparse.kron(result, sparse_paulis[pauli_char], format="csr")
        self._cached_sparse_matrix = self.coefficient * result
    return self._cached_sparse_matrix

ObservableSet dataclass

A set of observables with generation metadata (§3.3).

This class represents a collection of observables to be estimated, along with metadata about how they were generated for reproducibility.

Attributes:

Name Type Description
observables list[Observable]

List of Observable objects.

observable_set_id str | None

Unique identifier for this set.

generator_id str | None

ID of the generator that created this set.

generator_version str | None

Version of the generator.

generator_seed int | None

Random seed used for generation.

generator_params dict[str, Any]

Parameters passed to the generator.

n_qubits int

Number of qubits (all observables must match).

metadata dict[str, Any]

Additional set-level metadata.

Source code in src/quartumse/observables/core.py
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
@dataclass
class ObservableSet:
    """A set of observables with generation metadata (§3.3).

    This class represents a collection of observables to be estimated,
    along with metadata about how they were generated for reproducibility.

    Attributes:
        observables: List of Observable objects.
        observable_set_id: Unique identifier for this set.
        generator_id: ID of the generator that created this set.
        generator_version: Version of the generator.
        generator_seed: Random seed used for generation.
        generator_params: Parameters passed to the generator.
        n_qubits: Number of qubits (all observables must match).
        metadata: Additional set-level metadata.
    """

    observables: list[Observable]
    observable_set_id: str | None = None
    generator_id: str | None = None
    generator_version: str | None = None
    generator_seed: int | None = None
    generator_params: dict[str, Any] = field(default_factory=dict)
    metadata: dict[str, Any] = field(default_factory=dict)
    # Index for O(1) lookup by observable_id
    _id_index: dict[str, Observable] = field(default_factory=dict, repr=False, compare=False)

    def __post_init__(self) -> None:
        """Validate and set defaults."""
        if not self.observables:
            raise ValueError("ObservableSet must contain at least one observable")

        # Verify all observables have same qubit count
        n_qubits_set = {obs.n_qubits for obs in self.observables}
        if len(n_qubits_set) > 1:
            raise ValueError(f"All observables must have same qubit count, got: {n_qubits_set}")

        # Auto-generate set ID if not provided
        if self.observable_set_id is None:
            self.observable_set_id = f"obsset_{uuid.uuid4().hex[:8]}"

        # Build index for O(1) lookup by observable_id
        self._id_index = {obs.observable_id: obs for obs in self.observables if obs.observable_id}

    @property
    def n_qubits(self) -> int:
        """Number of qubits for observables in this set."""
        return self.observables[0].n_qubits

    @property
    def n_observables(self) -> int:
        """Number of observables in this set."""
        return len(self.observables)

    @property
    def M(self) -> int:
        """Alias for n_observables (common notation)."""
        return self.n_observables

    def get_by_id(self, observable_id: str) -> Observable:
        """Get an observable by its ID. O(1) lookup using index."""
        if observable_id in self._id_index:
            return self._id_index[observable_id]
        raise KeyError(f"Observable with ID '{observable_id}' not found")

    def locality_distribution(self) -> dict[int, int]:
        """Get distribution of Pauli weights."""
        dist: dict[int, int] = {}
        for obs in self.observables:
            dist[obs.locality] = dist.get(obs.locality, 0) + 1
        return dict(sorted(dist.items()))

    def max_locality(self) -> int:
        """Maximum Pauli weight in the set."""
        return max(obs.locality for obs in self.observables)

    def mean_locality(self) -> float:
        """Mean Pauli weight in the set."""
        return sum(obs.locality for obs in self.observables) / len(self.observables)

    def __iter__(self) -> Iterator[Observable]:
        """Iterate over observables."""
        return iter(self.observables)

    def __len__(self) -> int:
        """Number of observables."""
        return len(self.observables)

    def __getitem__(self, index: int) -> Observable:
        """Get observable by index."""
        return self.observables[index]

    def to_dict(self) -> dict[str, Any]:
        """Convert to dictionary for serialization."""
        return {
            "observable_set_id": self.observable_set_id,
            "n_observables": self.n_observables,
            "n_qubits": self.n_qubits,
            "generator_id": self.generator_id,
            "generator_version": self.generator_version,
            "generator_seed": self.generator_seed,
            "generator_params": self.generator_params,
            "observables": [obs.to_dict() for obs in self.observables],
            "locality_distribution": self.locality_distribution(),
            "metadata": self.metadata,
        }

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> ObservableSet:
        """Create from dictionary."""
        observables = [Observable.from_dict(obs_data) for obs_data in data["observables"]]
        return cls(
            observables=observables,
            observable_set_id=data.get("observable_set_id"),
            generator_id=data.get("generator_id"),
            generator_version=data.get("generator_version"),
            generator_seed=data.get("generator_seed"),
            generator_params=data.get("generator_params", {}),
            metadata=data.get("metadata", {}),
        )

    @classmethod
    def from_pauli_strings(
        cls,
        pauli_strings: list[str],
        coefficients: list[float] | None = None,
        **kwargs: Any,
    ) -> ObservableSet:
        """Create from a list of Pauli strings.

        Args:
            pauli_strings: List of Pauli string representations.
            coefficients: Optional list of coefficients (default all 1.0).
            **kwargs: Additional arguments passed to ObservableSet.

        Returns:
            ObservableSet containing the specified observables.
        """
        if coefficients is None:
            coefficients = [1.0] * len(pauli_strings)
        elif len(coefficients) != len(pauli_strings):
            raise ValueError(
                f"Number of coefficients ({len(coefficients)}) must match "
                f"number of Pauli strings ({len(pauli_strings)})"
            )

        observables = [
            Observable(pauli_string=ps, coefficient=coef)
            for ps, coef in zip(pauli_strings, coefficients, strict=False)
        ]
        return cls(observables=observables, **kwargs)

M property

Alias for n_observables (common notation).

n_observables property

Number of observables in this set.

n_qubits property

Number of qubits for observables in this set.

__getitem__(index)

Get observable by index.

Source code in src/quartumse/observables/core.py
348
349
350
def __getitem__(self, index: int) -> Observable:
    """Get observable by index."""
    return self.observables[index]

__iter__()

Iterate over observables.

Source code in src/quartumse/observables/core.py
340
341
342
def __iter__(self) -> Iterator[Observable]:
    """Iterate over observables."""
    return iter(self.observables)

__len__()

Number of observables.

Source code in src/quartumse/observables/core.py
344
345
346
def __len__(self) -> int:
    """Number of observables."""
    return len(self.observables)

__post_init__()

Validate and set defaults.

Source code in src/quartumse/observables/core.py
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
def __post_init__(self) -> None:
    """Validate and set defaults."""
    if not self.observables:
        raise ValueError("ObservableSet must contain at least one observable")

    # Verify all observables have same qubit count
    n_qubits_set = {obs.n_qubits for obs in self.observables}
    if len(n_qubits_set) > 1:
        raise ValueError(f"All observables must have same qubit count, got: {n_qubits_set}")

    # Auto-generate set ID if not provided
    if self.observable_set_id is None:
        self.observable_set_id = f"obsset_{uuid.uuid4().hex[:8]}"

    # Build index for O(1) lookup by observable_id
    self._id_index = {obs.observable_id: obs for obs in self.observables if obs.observable_id}

from_dict(data) classmethod

Create from dictionary.

Source code in src/quartumse/observables/core.py
367
368
369
370
371
372
373
374
375
376
377
378
379
@classmethod
def from_dict(cls, data: dict[str, Any]) -> ObservableSet:
    """Create from dictionary."""
    observables = [Observable.from_dict(obs_data) for obs_data in data["observables"]]
    return cls(
        observables=observables,
        observable_set_id=data.get("observable_set_id"),
        generator_id=data.get("generator_id"),
        generator_version=data.get("generator_version"),
        generator_seed=data.get("generator_seed"),
        generator_params=data.get("generator_params", {}),
        metadata=data.get("metadata", {}),
    )

from_pauli_strings(pauli_strings, coefficients=None, **kwargs) classmethod

Create from a list of Pauli strings.

Parameters:

Name Type Description Default
pauli_strings list[str]

List of Pauli string representations.

required
coefficients list[float] | None

Optional list of coefficients (default all 1.0).

None
**kwargs Any

Additional arguments passed to ObservableSet.

{}

Returns:

Type Description
ObservableSet

ObservableSet containing the specified observables.

Source code in src/quartumse/observables/core.py
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
@classmethod
def from_pauli_strings(
    cls,
    pauli_strings: list[str],
    coefficients: list[float] | None = None,
    **kwargs: Any,
) -> ObservableSet:
    """Create from a list of Pauli strings.

    Args:
        pauli_strings: List of Pauli string representations.
        coefficients: Optional list of coefficients (default all 1.0).
        **kwargs: Additional arguments passed to ObservableSet.

    Returns:
        ObservableSet containing the specified observables.
    """
    if coefficients is None:
        coefficients = [1.0] * len(pauli_strings)
    elif len(coefficients) != len(pauli_strings):
        raise ValueError(
            f"Number of coefficients ({len(coefficients)}) must match "
            f"number of Pauli strings ({len(pauli_strings)})"
        )

    observables = [
        Observable(pauli_string=ps, coefficient=coef)
        for ps, coef in zip(pauli_strings, coefficients, strict=False)
    ]
    return cls(observables=observables, **kwargs)

get_by_id(observable_id)

Get an observable by its ID. O(1) lookup using index.

Source code in src/quartumse/observables/core.py
319
320
321
322
323
def get_by_id(self, observable_id: str) -> Observable:
    """Get an observable by its ID. O(1) lookup using index."""
    if observable_id in self._id_index:
        return self._id_index[observable_id]
    raise KeyError(f"Observable with ID '{observable_id}' not found")

locality_distribution()

Get distribution of Pauli weights.

Source code in src/quartumse/observables/core.py
325
326
327
328
329
330
def locality_distribution(self) -> dict[int, int]:
    """Get distribution of Pauli weights."""
    dist: dict[int, int] = {}
    for obs in self.observables:
        dist[obs.locality] = dist.get(obs.locality, 0) + 1
    return dict(sorted(dist.items()))

max_locality()

Maximum Pauli weight in the set.

Source code in src/quartumse/observables/core.py
332
333
334
def max_locality(self) -> int:
    """Maximum Pauli weight in the set."""
    return max(obs.locality for obs in self.observables)

mean_locality()

Mean Pauli weight in the set.

Source code in src/quartumse/observables/core.py
336
337
338
def mean_locality(self) -> float:
    """Mean Pauli weight in the set."""
    return sum(obs.locality for obs in self.observables) / len(self.observables)

to_dict()

Convert to dictionary for serialization.

Source code in src/quartumse/observables/core.py
352
353
354
355
356
357
358
359
360
361
362
363
364
365
def to_dict(self) -> dict[str, Any]:
    """Convert to dictionary for serialization."""
    return {
        "observable_set_id": self.observable_set_id,
        "n_observables": self.n_observables,
        "n_qubits": self.n_qubits,
        "generator_id": self.generator_id,
        "generator_version": self.generator_version,
        "generator_seed": self.generator_seed,
        "generator_params": self.generator_params,
        "observables": [obs.to_dict() for obs in self.observables],
        "locality_distribution": self.locality_distribution(),
        "metadata": self.metadata,
    }

ObservableSuite dataclass

A named collection of observables for benchmarking.

This class extends ObservableSet with: - Suite name and type for clear scenario identification - Optional weights for weighted-sum objectives (energy, cost) - Objective type specification - Commutation analysis metadata

Attributes:

Name Type Description
name str

Human-readable suite name (e.g., "workload_qaoa_cost")

suite_type SuiteType

Category (workload, stress, posthoc, commuting)

observable_set ObservableSet

The underlying ObservableSet

weights dict[str, float] | None

Optional dict mapping observable_id -> coefficient

objective ObjectiveType

Type of objective function

description str

Human-readable description

metadata dict[str, Any]

Additional suite-specific metadata

Source code in src/quartumse/observables/suites.py
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
@dataclass
class ObservableSuite:
    """A named collection of observables for benchmarking.

    This class extends ObservableSet with:
    - Suite name and type for clear scenario identification
    - Optional weights for weighted-sum objectives (energy, cost)
    - Objective type specification
    - Commutation analysis metadata

    Attributes:
        name: Human-readable suite name (e.g., "workload_qaoa_cost")
        suite_type: Category (workload, stress, posthoc, commuting)
        observable_set: The underlying ObservableSet
        weights: Optional dict mapping observable_id -> coefficient
        objective: Type of objective function
        description: Human-readable description
        metadata: Additional suite-specific metadata
    """

    name: str
    suite_type: SuiteType
    observable_set: ObservableSet
    weights: dict[str, float] | None = None
    objective: ObjectiveType = ObjectiveType.PER_OBSERVABLE
    description: str = ""
    metadata: dict[str, Any] = field(default_factory=dict)

    def __post_init__(self) -> None:
        """Validate suite configuration."""
        # If weights provided, ensure objective is WEIGHTED_SUM
        if self.weights and self.objective == ObjectiveType.PER_OBSERVABLE:
            self.objective = ObjectiveType.WEIGHTED_SUM

        # Validate weights keys match observable IDs
        if self.weights:
            obs_ids = {obs.observable_id for obs in self.observable_set}
            weight_ids = set(self.weights.keys())
            if not weight_ids.issubset(obs_ids):
                missing = weight_ids - obs_ids
                raise ValueError(f"Weight keys not found in observables: {missing}")

    @property
    def n_observables(self) -> int:
        """Number of observables in the suite."""
        return len(self.observable_set)

    @property
    def n_qubits(self) -> int:
        """Number of qubits."""
        return self.observable_set.n_qubits

    @property
    def observables(self) -> list[Observable]:
        """List of observables."""
        return self.observable_set.observables

    def locality_distribution(self) -> dict[int, int]:
        """Distribution of Pauli weights."""
        return self.observable_set.locality_distribution()

    def commutation_analysis(self) -> dict[str, Any]:
        """Analyze commutation structure of the suite.

        Returns dict with:
            - n_commuting_groups: Number of groups after greedy partitioning
            - max_group_size: Largest commuting group
            - fully_commuting: Whether all observables commute
            - grouping_efficiency: n_observables / n_groups (higher = better for direct)
        """
        from .grouping import partition_observable_set

        groups, stats = partition_observable_set(self.observable_set)

        return {
            "n_commuting_groups": len(groups),
            "max_group_size": max(len(g.observables) for g in groups) if groups else 0,
            "fully_commuting": len(groups) == 1,
            "grouping_efficiency": self.n_observables / len(groups) if groups else 0,
            "partition_stats": stats,
        }

    def compute_objective(self, estimates: dict[str, float]) -> float:
        """Compute the objective value from observable estimates.

        Args:
            estimates: Dict mapping observable_id -> estimated expectation value

        Returns:
            Objective value (interpretation depends on objective type)
        """
        if self.objective == ObjectiveType.PER_OBSERVABLE:
            # Return mean estimate (not very meaningful, but consistent)
            return np.mean(list(estimates.values()))

        elif self.objective == ObjectiveType.WEIGHTED_SUM:
            if not self.weights:
                raise ValueError("WEIGHTED_SUM objective requires weights")
            total = 0.0
            for obs_id, weight in self.weights.items():
                if obs_id in estimates:
                    total += weight * estimates[obs_id]
            return total

        elif self.objective == ObjectiveType.MAX_ERROR:
            # This requires truth values, return NaN for now
            return float("nan")

        else:
            raise ValueError(f"Unknown objective type: {self.objective}")

    def to_dict(self) -> dict[str, Any]:
        """Convert to dictionary for serialization."""
        return {
            "name": self.name,
            "suite_type": self.suite_type.value,
            "objective": self.objective.value,
            "description": self.description,
            "n_observables": self.n_observables,
            "n_qubits": self.n_qubits,
            "weights": self.weights,
            "observable_set": self.observable_set.to_dict(),
            "locality_distribution": self.locality_distribution(),
            "metadata": self.metadata,
        }

    @classmethod
    def from_pauli_strings(
        cls,
        name: str,
        suite_type: SuiteType,
        pauli_strings: list[str],
        weights: dict[str, float] | None = None,
        **kwargs: Any,
    ) -> ObservableSuite:
        """Create suite from Pauli strings.

        Args:
            name: Suite name
            suite_type: Suite category
            pauli_strings: List of Pauli string representations
            weights: Optional dict mapping Pauli string -> coefficient
            **kwargs: Additional arguments (objective, description, metadata)
        """
        obs_set = ObservableSet.from_pauli_strings(pauli_strings)

        # Convert weights from Pauli string keys to observable_id keys
        if weights:
            id_weights = {}
            for obs in obs_set.observables:
                if obs.pauli_string in weights:
                    id_weights[obs.observable_id] = weights[obs.pauli_string]
            weights = id_weights

        return cls(
            name=name,
            suite_type=suite_type,
            observable_set=obs_set,
            weights=weights,
            **kwargs,
        )

n_observables property

Number of observables in the suite.

n_qubits property

Number of qubits.

observables property

List of observables.

__post_init__()

Validate suite configuration.

Source code in src/quartumse/observables/suites.py
77
78
79
80
81
82
83
84
85
86
87
88
89
def __post_init__(self) -> None:
    """Validate suite configuration."""
    # If weights provided, ensure objective is WEIGHTED_SUM
    if self.weights and self.objective == ObjectiveType.PER_OBSERVABLE:
        self.objective = ObjectiveType.WEIGHTED_SUM

    # Validate weights keys match observable IDs
    if self.weights:
        obs_ids = {obs.observable_id for obs in self.observable_set}
        weight_ids = set(self.weights.keys())
        if not weight_ids.issubset(obs_ids):
            missing = weight_ids - obs_ids
            raise ValueError(f"Weight keys not found in observables: {missing}")

commutation_analysis()

Analyze commutation structure of the suite.

Returns dict with
  • n_commuting_groups: Number of groups after greedy partitioning
  • max_group_size: Largest commuting group
  • fully_commuting: Whether all observables commute
  • grouping_efficiency: n_observables / n_groups (higher = better for direct)
Source code in src/quartumse/observables/suites.py
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
def commutation_analysis(self) -> dict[str, Any]:
    """Analyze commutation structure of the suite.

    Returns dict with:
        - n_commuting_groups: Number of groups after greedy partitioning
        - max_group_size: Largest commuting group
        - fully_commuting: Whether all observables commute
        - grouping_efficiency: n_observables / n_groups (higher = better for direct)
    """
    from .grouping import partition_observable_set

    groups, stats = partition_observable_set(self.observable_set)

    return {
        "n_commuting_groups": len(groups),
        "max_group_size": max(len(g.observables) for g in groups) if groups else 0,
        "fully_commuting": len(groups) == 1,
        "grouping_efficiency": self.n_observables / len(groups) if groups else 0,
        "partition_stats": stats,
    }

compute_objective(estimates)

Compute the objective value from observable estimates.

Parameters:

Name Type Description Default
estimates dict[str, float]

Dict mapping observable_id -> estimated expectation value

required

Returns:

Type Description
float

Objective value (interpretation depends on objective type)

Source code in src/quartumse/observables/suites.py
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
def compute_objective(self, estimates: dict[str, float]) -> float:
    """Compute the objective value from observable estimates.

    Args:
        estimates: Dict mapping observable_id -> estimated expectation value

    Returns:
        Objective value (interpretation depends on objective type)
    """
    if self.objective == ObjectiveType.PER_OBSERVABLE:
        # Return mean estimate (not very meaningful, but consistent)
        return np.mean(list(estimates.values()))

    elif self.objective == ObjectiveType.WEIGHTED_SUM:
        if not self.weights:
            raise ValueError("WEIGHTED_SUM objective requires weights")
        total = 0.0
        for obs_id, weight in self.weights.items():
            if obs_id in estimates:
                total += weight * estimates[obs_id]
        return total

    elif self.objective == ObjectiveType.MAX_ERROR:
        # This requires truth values, return NaN for now
        return float("nan")

    else:
        raise ValueError(f"Unknown objective type: {self.objective}")

from_pauli_strings(name, suite_type, pauli_strings, weights=None, **kwargs) classmethod

Create suite from Pauli strings.

Parameters:

Name Type Description Default
name str

Suite name

required
suite_type SuiteType

Suite category

required
pauli_strings list[str]

List of Pauli string representations

required
weights dict[str, float] | None

Optional dict mapping Pauli string -> coefficient

None
**kwargs Any

Additional arguments (objective, description, metadata)

{}
Source code in src/quartumse/observables/suites.py
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
@classmethod
def from_pauli_strings(
    cls,
    name: str,
    suite_type: SuiteType,
    pauli_strings: list[str],
    weights: dict[str, float] | None = None,
    **kwargs: Any,
) -> ObservableSuite:
    """Create suite from Pauli strings.

    Args:
        name: Suite name
        suite_type: Suite category
        pauli_strings: List of Pauli string representations
        weights: Optional dict mapping Pauli string -> coefficient
        **kwargs: Additional arguments (objective, description, metadata)
    """
    obs_set = ObservableSet.from_pauli_strings(pauli_strings)

    # Convert weights from Pauli string keys to observable_id keys
    if weights:
        id_weights = {}
        for obs in obs_set.observables:
            if obs.pauli_string in weights:
                id_weights[obs.observable_id] = weights[obs.pauli_string]
        weights = id_weights

    return cls(
        name=name,
        suite_type=suite_type,
        observable_set=obs_set,
        weights=weights,
        **kwargs,
    )

locality_distribution()

Distribution of Pauli weights.

Source code in src/quartumse/observables/suites.py
106
107
108
def locality_distribution(self) -> dict[int, int]:
    """Distribution of Pauli weights."""
    return self.observable_set.locality_distribution()

to_dict()

Convert to dictionary for serialization.

Source code in src/quartumse/observables/suites.py
160
161
162
163
164
165
166
167
168
169
170
171
172
173
def to_dict(self) -> dict[str, Any]:
    """Convert to dictionary for serialization."""
    return {
        "name": self.name,
        "suite_type": self.suite_type.value,
        "objective": self.objective.value,
        "description": self.description,
        "n_observables": self.n_observables,
        "n_qubits": self.n_qubits,
        "weights": self.weights,
        "observable_set": self.observable_set.to_dict(),
        "locality_distribution": self.locality_distribution(),
        "metadata": self.metadata,
    }

ParquetReader

Reader for partitioned Parquet results.

Example

reader = ParquetReader("results/run_001") result_set = reader.read_long_form() summary = reader.read_summary() manifest = reader.read_manifest()

Source code in src/quartumse/io/parquet_io.py
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
class ParquetReader:
    """Reader for partitioned Parquet results.

    Example:
        reader = ParquetReader("results/run_001")
        result_set = reader.read_long_form()
        summary = reader.read_summary()
        manifest = reader.read_manifest()
    """

    def __init__(self, input_dir: str | Path) -> None:
        """Initialize reader.

        Args:
            input_dir: Root directory for this run's results.
        """
        _check_parquet_available()
        self.input_dir = Path(input_dir)

    def read_long_form(
        self,
        filters: list[tuple[str, str, Any]] | None = None,
    ) -> LongFormResultSet:
        """Read long-form results from Parquet.

        Args:
            filters: Optional pyarrow filters, e.g.,
                [("protocol_id", "=", "direct_naive")]

        Returns:
            LongFormResultSet containing the results.
        """
        long_form_dir = self.input_dir / "long_form"

        if not long_form_dir.exists():
            raise FileNotFoundError(f"Long-form results not found: {long_form_dir}")

        # Read dataset with optional filters
        dataset = pq.ParquetDataset(str(long_form_dir), filters=filters)
        df = dataset.read().to_pandas()

        # Convert to LongFormRow objects using vectorized approach (faster than iterrows)
        # Replace NaN with None across all columns first
        df = df.where(pd.notna(df), None)
        records = df.to_dict("records")
        rows = [LongFormRow(**record) for record in records]

        return LongFormResultSet(rows)

    def read_long_form_df(
        self,
        filters: list[tuple[str, str, Any]] | None = None,
    ) -> pd.DataFrame:
        """Read long-form results as DataFrame.

        Args:
            filters: Optional pyarrow filters.

        Returns:
            pandas DataFrame with results.
        """
        long_form_dir = self.input_dir / "long_form"

        if not long_form_dir.exists():
            raise FileNotFoundError(f"Long-form results not found: {long_form_dir}")

        dataset = pq.ParquetDataset(str(long_form_dir), filters=filters)
        return dataset.read().to_pandas()

    def read_summary(self) -> list[SummaryRow]:
        """Read summary table from Parquet.

        Returns:
            List of SummaryRow objects.
        """
        summary_path = self.input_dir / "summary.parquet"

        if not summary_path.exists():
            raise FileNotFoundError(f"Summary not found: {summary_path}")

        df = pq.read_table(str(summary_path)).to_pandas()

        # Use vectorized approach (faster than iterrows)
        df = df.where(pd.notna(df), None)
        records = df.to_dict("records")
        rows = [SummaryRow(**record) for record in records]

        return rows

    def read_summary_df(self) -> pd.DataFrame:
        """Read summary table as DataFrame.

        Returns:
            pandas DataFrame with summary.
        """
        summary_path = self.input_dir / "summary.parquet"

        if not summary_path.exists():
            raise FileNotFoundError(f"Summary not found: {summary_path}")

        return pq.read_table(str(summary_path)).to_pandas()

    def read_task_results(self) -> list[TaskResult]:
        """Read task results from Parquet.

        Returns:
            List of TaskResult objects.
        """
        task_path = self.input_dir / "task_results.parquet"

        if not task_path.exists():
            raise FileNotFoundError(f"Task results not found: {task_path}")

        df = pq.read_table(str(task_path)).to_pandas()

        # Use vectorized approach (faster than iterrows)
        df = df.where(pd.notna(df), None)
        records = df.to_dict("records")
        rows = []
        for record in records:
            # Parse outputs JSON
            if "outputs" in record and record["outputs"]:
                record["outputs"] = json.loads(record["outputs"])
            rows.append(TaskResult(**record))

        return rows

    def read_manifest(self) -> RunManifest:
        """Read run manifest from JSON.

        Returns:
            RunManifest object.
        """
        manifest_path = self.input_dir / "manifest.json"

        if not manifest_path.exists():
            raise FileNotFoundError(f"Manifest not found: {manifest_path}")

        with open(manifest_path) as f:
            data = json.load(f)

        # Parse datetime fields
        for key in ["created_at", "completed_at"]:
            if data[key] is not None:
                data[key] = datetime.fromisoformat(data[key])

        return RunManifest(**data)

    def list_protocols(self) -> list[str]:
        """List available protocols in the dataset.

        Returns:
            List of protocol IDs.
        """
        long_form_dir = self.input_dir / "long_form"

        if not long_form_dir.exists():
            return []

        # List protocol_id=* directories
        protocols = []
        for path in long_form_dir.iterdir():
            if path.is_dir() and path.name.startswith("protocol_id="):
                protocols.append(path.name.split("=", 1)[1])

        return sorted(protocols)

    def list_circuits(self, protocol_id: str | None = None) -> list[str]:
        """List available circuits in the dataset.

        Args:
            protocol_id: Optional filter by protocol.

        Returns:
            List of circuit IDs.
        """
        long_form_dir = self.input_dir / "long_form"

        if not long_form_dir.exists():
            return []

        circuits = set()

        if protocol_id:
            protocol_dir = long_form_dir / f"protocol_id={protocol_id}"
            if protocol_dir.exists():
                for path in protocol_dir.iterdir():
                    if path.is_dir() and path.name.startswith("circuit_id="):
                        circuits.add(path.name.split("=", 1)[1])
        else:
            for protocol_path in long_form_dir.iterdir():
                if protocol_path.is_dir():
                    for path in protocol_path.iterdir():
                        if path.is_dir() and path.name.startswith("circuit_id="):
                            circuits.add(path.name.split("=", 1)[1])

        return sorted(circuits)

__init__(input_dir)

Initialize reader.

Parameters:

Name Type Description Default
input_dir str | Path

Root directory for this run's results.

required
Source code in src/quartumse/io/parquet_io.py
250
251
252
253
254
255
256
257
def __init__(self, input_dir: str | Path) -> None:
    """Initialize reader.

    Args:
        input_dir: Root directory for this run's results.
    """
    _check_parquet_available()
    self.input_dir = Path(input_dir)

list_circuits(protocol_id=None)

List available circuits in the dataset.

Parameters:

Name Type Description Default
protocol_id str | None

Optional filter by protocol.

None

Returns:

Type Description
list[str]

List of circuit IDs.

Source code in src/quartumse/io/parquet_io.py
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
def list_circuits(self, protocol_id: str | None = None) -> list[str]:
    """List available circuits in the dataset.

    Args:
        protocol_id: Optional filter by protocol.

    Returns:
        List of circuit IDs.
    """
    long_form_dir = self.input_dir / "long_form"

    if not long_form_dir.exists():
        return []

    circuits = set()

    if protocol_id:
        protocol_dir = long_form_dir / f"protocol_id={protocol_id}"
        if protocol_dir.exists():
            for path in protocol_dir.iterdir():
                if path.is_dir() and path.name.startswith("circuit_id="):
                    circuits.add(path.name.split("=", 1)[1])
    else:
        for protocol_path in long_form_dir.iterdir():
            if protocol_path.is_dir():
                for path in protocol_path.iterdir():
                    if path.is_dir() and path.name.startswith("circuit_id="):
                        circuits.add(path.name.split("=", 1)[1])

    return sorted(circuits)

list_protocols()

List available protocols in the dataset.

Returns:

Type Description
list[str]

List of protocol IDs.

Source code in src/quartumse/io/parquet_io.py
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
def list_protocols(self) -> list[str]:
    """List available protocols in the dataset.

    Returns:
        List of protocol IDs.
    """
    long_form_dir = self.input_dir / "long_form"

    if not long_form_dir.exists():
        return []

    # List protocol_id=* directories
    protocols = []
    for path in long_form_dir.iterdir():
        if path.is_dir() and path.name.startswith("protocol_id="):
            protocols.append(path.name.split("=", 1)[1])

    return sorted(protocols)

read_long_form(filters=None)

Read long-form results from Parquet.

Parameters:

Name Type Description Default
filters list[tuple[str, str, Any]] | None

Optional pyarrow filters, e.g., [("protocol_id", "=", "direct_naive")]

None

Returns:

Type Description
LongFormResultSet

LongFormResultSet containing the results.

Source code in src/quartumse/io/parquet_io.py
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
def read_long_form(
    self,
    filters: list[tuple[str, str, Any]] | None = None,
) -> LongFormResultSet:
    """Read long-form results from Parquet.

    Args:
        filters: Optional pyarrow filters, e.g.,
            [("protocol_id", "=", "direct_naive")]

    Returns:
        LongFormResultSet containing the results.
    """
    long_form_dir = self.input_dir / "long_form"

    if not long_form_dir.exists():
        raise FileNotFoundError(f"Long-form results not found: {long_form_dir}")

    # Read dataset with optional filters
    dataset = pq.ParquetDataset(str(long_form_dir), filters=filters)
    df = dataset.read().to_pandas()

    # Convert to LongFormRow objects using vectorized approach (faster than iterrows)
    # Replace NaN with None across all columns first
    df = df.where(pd.notna(df), None)
    records = df.to_dict("records")
    rows = [LongFormRow(**record) for record in records]

    return LongFormResultSet(rows)

read_long_form_df(filters=None)

Read long-form results as DataFrame.

Parameters:

Name Type Description Default
filters list[tuple[str, str, Any]] | None

Optional pyarrow filters.

None

Returns:

Type Description
DataFrame

pandas DataFrame with results.

Source code in src/quartumse/io/parquet_io.py
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
def read_long_form_df(
    self,
    filters: list[tuple[str, str, Any]] | None = None,
) -> pd.DataFrame:
    """Read long-form results as DataFrame.

    Args:
        filters: Optional pyarrow filters.

    Returns:
        pandas DataFrame with results.
    """
    long_form_dir = self.input_dir / "long_form"

    if not long_form_dir.exists():
        raise FileNotFoundError(f"Long-form results not found: {long_form_dir}")

    dataset = pq.ParquetDataset(str(long_form_dir), filters=filters)
    return dataset.read().to_pandas()

read_manifest()

Read run manifest from JSON.

Returns:

Type Description
RunManifest

RunManifest object.

Source code in src/quartumse/io/parquet_io.py
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
def read_manifest(self) -> RunManifest:
    """Read run manifest from JSON.

    Returns:
        RunManifest object.
    """
    manifest_path = self.input_dir / "manifest.json"

    if not manifest_path.exists():
        raise FileNotFoundError(f"Manifest not found: {manifest_path}")

    with open(manifest_path) as f:
        data = json.load(f)

    # Parse datetime fields
    for key in ["created_at", "completed_at"]:
        if data[key] is not None:
            data[key] = datetime.fromisoformat(data[key])

    return RunManifest(**data)

read_summary()

Read summary table from Parquet.

Returns:

Type Description
list[SummaryRow]

List of SummaryRow objects.

Source code in src/quartumse/io/parquet_io.py
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
def read_summary(self) -> list[SummaryRow]:
    """Read summary table from Parquet.

    Returns:
        List of SummaryRow objects.
    """
    summary_path = self.input_dir / "summary.parquet"

    if not summary_path.exists():
        raise FileNotFoundError(f"Summary not found: {summary_path}")

    df = pq.read_table(str(summary_path)).to_pandas()

    # Use vectorized approach (faster than iterrows)
    df = df.where(pd.notna(df), None)
    records = df.to_dict("records")
    rows = [SummaryRow(**record) for record in records]

    return rows

read_summary_df()

Read summary table as DataFrame.

Returns:

Type Description
DataFrame

pandas DataFrame with summary.

Source code in src/quartumse/io/parquet_io.py
329
330
331
332
333
334
335
336
337
338
339
340
def read_summary_df(self) -> pd.DataFrame:
    """Read summary table as DataFrame.

    Returns:
        pandas DataFrame with summary.
    """
    summary_path = self.input_dir / "summary.parquet"

    if not summary_path.exists():
        raise FileNotFoundError(f"Summary not found: {summary_path}")

    return pq.read_table(str(summary_path)).to_pandas()

read_task_results()

Read task results from Parquet.

Returns:

Type Description
list[TaskResult]

List of TaskResult objects.

Source code in src/quartumse/io/parquet_io.py
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
def read_task_results(self) -> list[TaskResult]:
    """Read task results from Parquet.

    Returns:
        List of TaskResult objects.
    """
    task_path = self.input_dir / "task_results.parquet"

    if not task_path.exists():
        raise FileNotFoundError(f"Task results not found: {task_path}")

    df = pq.read_table(str(task_path)).to_pandas()

    # Use vectorized approach (faster than iterrows)
    df = df.where(pd.notna(df), None)
    records = df.to_dict("records")
    rows = []
    for record in records:
        # Parse outputs JSON
        if "outputs" in record and record["outputs"]:
            record["outputs"] = json.loads(record["outputs"])
        rows.append(TaskResult(**record))

    return rows

ParquetWriter

Writer for partitioned Parquet output.

Example

writer = ParquetWriter("results/run_001") writer.write_long_form(result_set) writer.write_summary(summary_rows) writer.write_manifest(manifest)

Source code in src/quartumse/io/parquet_io.py
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
class ParquetWriter:
    """Writer for partitioned Parquet output.

    Example:
        writer = ParquetWriter("results/run_001")
        writer.write_long_form(result_set)
        writer.write_summary(summary_rows)
        writer.write_manifest(manifest)
    """

    def __init__(self, output_dir: str | Path) -> None:
        """Initialize writer.

        Args:
            output_dir: Root output directory for this run.
        """
        _check_parquet_available()
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)

    def write_long_form(
        self,
        result_set: LongFormResultSet,
        partitioned: bool | None = None,
    ) -> Path:
        """Write long-form results to Parquet.

        Args:
            result_set: Collection of LongFormRow objects.
            partitioned: If True, partition by protocol/circuit/N_total.
                        If None, auto-detect (disabled on Windows due to path length limits).

        Returns:
            Path to the written file or directory.
        """
        if len(result_set) == 0:
            raise ValueError("Cannot write empty result set")

        # Auto-detect: disable partitioning on Windows to avoid path length issues
        if partitioned is None:
            partitioned = not IS_WINDOWS

        # Convert to DataFrame
        df = pd.DataFrame(result_set.to_dicts())

        # Convert datetime columns
        datetime_cols = [
            "job_submitted_at",
            "job_started_at",
            "job_completed_at",
        ]
        for col in datetime_cols:
            if col in df.columns:
                df[col] = pd.to_datetime(df[col])

        # Serialize dict columns to JSON strings (Arrow can't handle empty structs)
        for col in df.columns:
            if df[col].apply(lambda x: isinstance(x, dict)).any():
                df[col] = df[col].apply(lambda x: json.dumps(x) if isinstance(x, dict) else x)

        long_form_dir = self.output_dir / "long_form"

        if partitioned:
            # Write partitioned dataset
            partition_cols = ["protocol_id", "circuit_id", "N_total"]
            pq.write_to_dataset(
                pa.Table.from_pandas(df),
                root_path=str(long_form_dir),
                partition_cols=partition_cols,
            )
            return long_form_dir
        else:
            # Write single file
            output_path = long_form_dir / "data.parquet"
            long_form_dir.mkdir(parents=True, exist_ok=True)
            pq.write_table(pa.Table.from_pandas(df), str(output_path))
            return output_path

    def write_summary(self, summary_rows: list[SummaryRow]) -> Path:
        """Write summary table to Parquet.

        Args:
            summary_rows: List of SummaryRow objects.

        Returns:
            Path to the written file.
        """
        if not summary_rows:
            raise ValueError("Cannot write empty summary")

        df = pd.DataFrame([row.model_dump() for row in summary_rows])

        # Serialize dict columns to JSON strings (Arrow can't handle empty structs)
        for col in df.columns:
            if df[col].apply(lambda x: isinstance(x, dict)).any():
                df[col] = df[col].apply(lambda x: json.dumps(x) if isinstance(x, dict) else x)

        output_path = self.output_dir / "summary.parquet"
        pq.write_table(pa.Table.from_pandas(df), str(output_path))
        return output_path

    def write_task_results(self, task_results: list[TaskResult]) -> Path:
        """Write task results to Parquet.

        Args:
            task_results: List of TaskResult objects.

        Returns:
            Path to the written file.
        """
        if not task_results:
            raise ValueError("Cannot write empty task results")

        # Flatten the outputs dict into the main record
        records = []
        for result in task_results:
            record = result.model_dump()
            # Keep outputs as JSON string for flexibility
            record["outputs"] = json.dumps(record["outputs"])
            records.append(record)

        df = pd.DataFrame(records)
        output_path = self.output_dir / "task_results.parquet"
        pq.write_table(pa.Table.from_pandas(df), str(output_path))
        return output_path

    def write_manifest(self, manifest: RunManifest) -> Path:
        """Write run manifest to JSON.

        Args:
            manifest: RunManifest object.

        Returns:
            Path to the written file.
        """
        output_path = self.output_dir / "manifest.json"

        self._populate_manifest_paths(manifest)
        manifest.validate_required_fields()

        # Convert to dict and handle datetime
        data = manifest.model_dump()
        for key in ["created_at", "completed_at"]:
            if data[key] is not None:
                data[key] = data[key].isoformat()

        with open(output_path, "w") as f:
            json.dump(data, f, indent=2)

        return output_path

    def write_raw_shots(self, raw_records: list[dict]) -> Path:
        """Write raw shot-level data to a separate parquet file.

        Each record should contain:
        - protocol_id, circuit_id, N_total, replicate_id, noise_profile: identifiers
        - setting_id: which measurement setting
        - bitstrings: JSON-serialized list of bitstring outcomes
        - measurement_bases: JSON-serialized basis choices (null for direct protocols)

        Args:
            raw_records: List of dicts, one per (config, setting) combination.

        Returns:
            Path to the written parquet file.
        """
        if not raw_records:
            raise ValueError("Cannot write empty raw shots")

        df = pd.DataFrame(raw_records)

        raw_shots_dir = self.output_dir / "raw_shots"
        raw_shots_dir.mkdir(parents=True, exist_ok=True)
        output_path = raw_shots_dir / "data.parquet"
        pq.write_table(pa.Table.from_pandas(df), str(output_path))
        return output_path

    def _populate_manifest_paths(self, manifest: RunManifest) -> None:
        long_form_dir = self.output_dir / "long_form"
        summary_path = self.output_dir / "summary.parquet"
        task_results_path = self.output_dir / "task_results.parquet"
        plots_dir = self.output_dir / "plots"

        if manifest.long_form_path is None and long_form_dir.exists():
            manifest.long_form_path = str(long_form_dir)
        if manifest.summary_path is None and summary_path.exists():
            manifest.summary_path = str(summary_path)
        if manifest.task_results_path is None and task_results_path.exists():
            manifest.task_results_path = str(task_results_path)
        if manifest.plots_dir is None and plots_dir.exists():
            manifest.plots_dir = str(plots_dir)

__init__(output_dir)

Initialize writer.

Parameters:

Name Type Description Default
output_dir str | Path

Root output directory for this run.

required
Source code in src/quartumse/io/parquet_io.py
57
58
59
60
61
62
63
64
65
def __init__(self, output_dir: str | Path) -> None:
    """Initialize writer.

    Args:
        output_dir: Root output directory for this run.
    """
    _check_parquet_available()
    self.output_dir = Path(output_dir)
    self.output_dir.mkdir(parents=True, exist_ok=True)

write_long_form(result_set, partitioned=None)

Write long-form results to Parquet.

Parameters:

Name Type Description Default
result_set LongFormResultSet

Collection of LongFormRow objects.

required
partitioned bool | None

If True, partition by protocol/circuit/N_total. If None, auto-detect (disabled on Windows due to path length limits).

None

Returns:

Type Description
Path

Path to the written file or directory.

Source code in src/quartumse/io/parquet_io.py
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
def write_long_form(
    self,
    result_set: LongFormResultSet,
    partitioned: bool | None = None,
) -> Path:
    """Write long-form results to Parquet.

    Args:
        result_set: Collection of LongFormRow objects.
        partitioned: If True, partition by protocol/circuit/N_total.
                    If None, auto-detect (disabled on Windows due to path length limits).

    Returns:
        Path to the written file or directory.
    """
    if len(result_set) == 0:
        raise ValueError("Cannot write empty result set")

    # Auto-detect: disable partitioning on Windows to avoid path length issues
    if partitioned is None:
        partitioned = not IS_WINDOWS

    # Convert to DataFrame
    df = pd.DataFrame(result_set.to_dicts())

    # Convert datetime columns
    datetime_cols = [
        "job_submitted_at",
        "job_started_at",
        "job_completed_at",
    ]
    for col in datetime_cols:
        if col in df.columns:
            df[col] = pd.to_datetime(df[col])

    # Serialize dict columns to JSON strings (Arrow can't handle empty structs)
    for col in df.columns:
        if df[col].apply(lambda x: isinstance(x, dict)).any():
            df[col] = df[col].apply(lambda x: json.dumps(x) if isinstance(x, dict) else x)

    long_form_dir = self.output_dir / "long_form"

    if partitioned:
        # Write partitioned dataset
        partition_cols = ["protocol_id", "circuit_id", "N_total"]
        pq.write_to_dataset(
            pa.Table.from_pandas(df),
            root_path=str(long_form_dir),
            partition_cols=partition_cols,
        )
        return long_form_dir
    else:
        # Write single file
        output_path = long_form_dir / "data.parquet"
        long_form_dir.mkdir(parents=True, exist_ok=True)
        pq.write_table(pa.Table.from_pandas(df), str(output_path))
        return output_path

write_manifest(manifest)

Write run manifest to JSON.

Parameters:

Name Type Description Default
manifest RunManifest

RunManifest object.

required

Returns:

Type Description
Path

Path to the written file.

Source code in src/quartumse/io/parquet_io.py
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
def write_manifest(self, manifest: RunManifest) -> Path:
    """Write run manifest to JSON.

    Args:
        manifest: RunManifest object.

    Returns:
        Path to the written file.
    """
    output_path = self.output_dir / "manifest.json"

    self._populate_manifest_paths(manifest)
    manifest.validate_required_fields()

    # Convert to dict and handle datetime
    data = manifest.model_dump()
    for key in ["created_at", "completed_at"]:
        if data[key] is not None:
            data[key] = data[key].isoformat()

    with open(output_path, "w") as f:
        json.dump(data, f, indent=2)

    return output_path

write_raw_shots(raw_records)

Write raw shot-level data to a separate parquet file.

Each record should contain: - protocol_id, circuit_id, N_total, replicate_id, noise_profile: identifiers - setting_id: which measurement setting - bitstrings: JSON-serialized list of bitstring outcomes - measurement_bases: JSON-serialized basis choices (null for direct protocols)

Parameters:

Name Type Description Default
raw_records list[dict]

List of dicts, one per (config, setting) combination.

required

Returns:

Type Description
Path

Path to the written parquet file.

Source code in src/quartumse/io/parquet_io.py
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
def write_raw_shots(self, raw_records: list[dict]) -> Path:
    """Write raw shot-level data to a separate parquet file.

    Each record should contain:
    - protocol_id, circuit_id, N_total, replicate_id, noise_profile: identifiers
    - setting_id: which measurement setting
    - bitstrings: JSON-serialized list of bitstring outcomes
    - measurement_bases: JSON-serialized basis choices (null for direct protocols)

    Args:
        raw_records: List of dicts, one per (config, setting) combination.

    Returns:
        Path to the written parquet file.
    """
    if not raw_records:
        raise ValueError("Cannot write empty raw shots")

    df = pd.DataFrame(raw_records)

    raw_shots_dir = self.output_dir / "raw_shots"
    raw_shots_dir.mkdir(parents=True, exist_ok=True)
    output_path = raw_shots_dir / "data.parquet"
    pq.write_table(pa.Table.from_pandas(df), str(output_path))
    return output_path

write_summary(summary_rows)

Write summary table to Parquet.

Parameters:

Name Type Description Default
summary_rows list[SummaryRow]

List of SummaryRow objects.

required

Returns:

Type Description
Path

Path to the written file.

Source code in src/quartumse/io/parquet_io.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
def write_summary(self, summary_rows: list[SummaryRow]) -> Path:
    """Write summary table to Parquet.

    Args:
        summary_rows: List of SummaryRow objects.

    Returns:
        Path to the written file.
    """
    if not summary_rows:
        raise ValueError("Cannot write empty summary")

    df = pd.DataFrame([row.model_dump() for row in summary_rows])

    # Serialize dict columns to JSON strings (Arrow can't handle empty structs)
    for col in df.columns:
        if df[col].apply(lambda x: isinstance(x, dict)).any():
            df[col] = df[col].apply(lambda x: json.dumps(x) if isinstance(x, dict) else x)

    output_path = self.output_dir / "summary.parquet"
    pq.write_table(pa.Table.from_pandas(df), str(output_path))
    return output_path

write_task_results(task_results)

Write task results to Parquet.

Parameters:

Name Type Description Default
task_results list[TaskResult]

List of TaskResult objects.

required

Returns:

Type Description
Path

Path to the written file.

Source code in src/quartumse/io/parquet_io.py
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
def write_task_results(self, task_results: list[TaskResult]) -> Path:
    """Write task results to Parquet.

    Args:
        task_results: List of TaskResult objects.

    Returns:
        Path to the written file.
    """
    if not task_results:
        raise ValueError("Cannot write empty task results")

    # Flatten the outputs dict into the main record
    records = []
    for result in task_results:
        record = result.model_dump()
        # Keep outputs as JSON string for flexibility
        record["outputs"] = json.dumps(record["outputs"])
        records.append(record)

    df = pd.DataFrame(records)
    output_path = self.output_dir / "task_results.parquet"
    pq.write_table(pa.Table.from_pandas(df), str(output_path))
    return output_path

PosthocBenchmarkResult dataclass

Complete results from a post-hoc benchmark.

Source code in src/quartumse/analysis/posthoc_benchmark.py
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
@dataclass
class PosthocBenchmarkResult:
    """Complete results from a post-hoc benchmark."""

    # Library info
    library_size: int
    n_rounds: int
    observables_per_round: int

    # Per-protocol accounting
    shadows_costs: PosthocCostAccounting | None = None
    direct_costs: PosthocCostAccounting | None = None

    # Summary metrics
    shadows_total_shots: int = 0
    direct_total_shots: int = 0
    shot_savings_factor: float = 1.0  # direct_shots / shadows_shots

    # Break-even analysis
    breakeven_round: int | None = None  # Round where shadows becomes cheaper
    breakeven_observables: int | None = None

    # Coverage at fixed budgets
    coverage_at_budgets: list[CoverageAtBudget] = field(default_factory=list)

Protocol

Bases: ABC

Abstract base class for measurement protocols (§5.1).

All protocols must implement this interface. The five-method contract supports both static and adaptive protocols:

Static protocols: - next_plan() returns a single plan consuming the full budget - update() only accumulates data - finalize() performs one-shot estimation

Adaptive protocols: - next_plan() may be called multiple times - update() may adjust internal state based on accumulated data - Can implement early stopping via state.converged

Subclasses must implement: - protocol_id (class attribute): Unique identifier - protocol_version (class attribute): Semantic version - initialize(): Set up initial state - next_plan(): Generate measurement plan - acquire(): Execute measurements - update(): Update state with new data - finalize(): Produce final estimates

Source code in src/quartumse/protocols/base.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
class Protocol(ABC):
    """Abstract base class for measurement protocols (§5.1).

    All protocols must implement this interface. The five-method contract
    supports both static and adaptive protocols:

    Static protocols:
    - next_plan() returns a single plan consuming the full budget
    - update() only accumulates data
    - finalize() performs one-shot estimation

    Adaptive protocols:
    - next_plan() may be called multiple times
    - update() may adjust internal state based on accumulated data
    - Can implement early stopping via state.converged

    Subclasses must implement:
    - protocol_id (class attribute): Unique identifier
    - protocol_version (class attribute): Semantic version
    - initialize(): Set up initial state
    - next_plan(): Generate measurement plan
    - acquire(): Execute measurements
    - update(): Update state with new data
    - finalize(): Produce final estimates
    """

    # Subclasses must override these
    protocol_id: str = "abstract_protocol"
    protocol_version: str = "0.0.0"

    def __init__(self, config: ProtocolConfig | None = None) -> None:
        """Initialize protocol with configuration.

        Args:
            config: Protocol configuration. Uses defaults if None.
        """
        self.config = config or ProtocolConfig()

    @abstractmethod
    def initialize(
        self,
        observable_set: ObservableSet,
        total_budget: int,
        seed: int,
    ) -> ProtocolState:
        """Initialize protocol state for a new estimation task (§5.1).

        This method is called once at the start of estimation to set up
        the protocol's internal state.

        Args:
            observable_set: The set of observables to estimate.
            total_budget: Total shot budget for the estimation.
            seed: Random seed for reproducibility.

        Returns:
            Initialized ProtocolState.
        """
        ...

    @abstractmethod
    def next_plan(
        self,
        state: ProtocolState,
        remaining_budget: int,
    ) -> MeasurementPlan:
        """Generate the next measurement plan (§5.1).

        For static protocols, this returns a single plan consuming the
        full budget. For adaptive protocols, this may return partial plans
        to be executed iteratively.

        Args:
            state: Current protocol state.
            remaining_budget: Remaining shot budget.

        Returns:
            MeasurementPlan specifying settings and shot allocation.
        """
        ...

    @abstractmethod
    def acquire(
        self,
        circuit: QuantumCircuit,
        plan: MeasurementPlan,
        backend: AerSimulator | Any,
        seed: int,
        deadline: float | None = None,
    ) -> RawDatasetChunk:
        """Execute measurements according to the plan (§5.1).

        This method generates measurement circuits, executes them on the
        backend, and returns raw measurement outcomes.

        Args:
            circuit: The state preparation circuit.
            plan: Measurement plan to execute.
            backend: Quantum backend for execution.
            seed: Random seed for measurement randomness.
            deadline: Absolute time (time.time()) by which to stop.
                If None, no timeout.

        Returns:
            RawDatasetChunk containing measurement outcomes.
        """
        ...

    @abstractmethod
    def update(
        self,
        state: ProtocolState,
        data_chunk: RawDatasetChunk,
    ) -> ProtocolState:
        """Update protocol state with new measurement data (§5.1).

        For static protocols, this simply accumulates data. For adaptive
        protocols, this may update variance estimates, convergence checks,
        or other internal state used for planning.

        Args:
            state: Current protocol state.
            data_chunk: New measurement data.

        Returns:
            Updated ProtocolState.
        """
        ...

    @abstractmethod
    def finalize(
        self,
        state: ProtocolState,
        observable_set: ObservableSet,
    ) -> Estimates:
        """Produce final estimates from accumulated data (§5.1).

        This method processes all accumulated measurement data to produce
        final expectation value estimates with uncertainty quantification.

        Args:
            state: Final protocol state with all accumulated data.
            observable_set: The observables to estimate.

        Returns:
            Estimates containing per-observable results.
        """
        ...

    def run(
        self,
        circuit: QuantumCircuit,
        observable_set: ObservableSet,
        total_budget: int,
        backend: AerSimulator | Any,
        seed: int | None = None,
        timeout_s: float | None = None,
        hw_timing_profile: Any | None = None,
    ) -> Estimates:
        """Execute the full protocol pipeline.

        This is a convenience method that runs the full initialize ->
        (next_plan -> acquire -> update)* -> finalize loop.

        Args:
            circuit: State preparation circuit.
            observable_set: Observables to estimate.
            total_budget: Total shot budget.
            backend: Quantum backend.
            seed: Random seed (uses config.random_seed if None).
            timeout_s: Optional per-run timeout in seconds. If the deadline
                is exceeded, the run stops early, finalizes with partial data,
                and sets timed_out=True on the returned Estimates.
            hw_timing_profile: Optional HardwareTimingProfile for estimating
                real-device execution time.

        Returns:
            Final Estimates.
        """
        seed = seed if seed is not None else (self.config.random_seed or 42)

        deadline = (time.time() + timeout_s) if timeout_s is not None else None
        timed_out = False

        # Initialize + plan (pre-compute phase)
        pre_compute_start = time.time()
        state = self.initialize(observable_set, total_budget, seed)
        remaining = total_budget
        round_seed = seed

        start_time = time.time()
        total_aer_time = 0.0
        all_per_setting_aer_times: list[float] = []

        # Main loop (single iteration for static protocols)
        while remaining > 0 and not state.converged:
            if state.n_rounds >= self.config.max_rounds:
                break

            # Check deadline before planning
            if deadline is not None and time.time() >= deadline:
                timed_out = True
                break

            # Plan
            plan = self.next_plan(state, remaining)
            if plan.total_shots == 0:
                break

            pre_compute_time = time.time() - pre_compute_start

            # Acquire
            round_start = time.time()
            chunk = self.acquire(circuit, plan, backend, round_seed, deadline=deadline)
            quantum_time = time.time() - round_start

            # Extract per-setting AER times from chunk metadata
            chunk_aer_time = chunk.metadata.get("aer_simulate_s", 0.0)
            total_aer_time += chunk_aer_time
            per_setting_times = chunk.metadata.get("per_setting_aer_times_s", [])
            all_per_setting_aer_times.extend(per_setting_times)

            # Check if acquire() hit the deadline
            if chunk.metadata.get("timed_out", False):
                timed_out = True

            # Accumulate raw data for post-hoc analysis
            state.add_chunk(chunk)

            # Update
            classical_start = time.time()
            state = self.update(state, chunk)
            classical_time = time.time() - classical_start

            # Record round metadata (append if update() didn't already)
            if len(state.round_metadata) < state.n_rounds:
                state.round_metadata.append({})
            state.round_metadata[-1].update(
                {
                    "quantum_time_s": quantum_time,
                    "classical_time_s": classical_time,
                    "shots_this_round": chunk.n_shots,
                    "settings_this_round": plan.n_settings,
                }
            )

            remaining -= chunk.n_shots
            round_seed += 1

            # Reset pre_compute_start for next round's planning
            pre_compute_start = time.time()

            if timed_out:
                break

        # Finalize (post-process phase)
        post_start = time.time()
        estimates = self.finalize(state, observable_set)
        post_time = time.time() - post_start
        estimates.raw_chunks = state.accumulated_data

        # Add timing and protocol info
        total_time = time.time() - start_time
        estimates.time_quantum_s = sum(m.get("quantum_time_s", 0) for m in state.round_metadata)
        estimates.time_classical_s = total_time - (estimates.time_quantum_s or 0)
        estimates.protocol_id = self.protocol_id
        estimates.protocol_version = self.protocol_version

        # Timeout info
        estimates.timed_out = timed_out
        if timed_out:
            estimates.n_shots_completed = state.total_shots_used

        # Build timing breakdown
        pre_compute_total = sum(
            m.get("quantum_time_s", 0) for m in state.round_metadata
        )
        # pre_compute_time is from first round; approximate total pre-compute
        # as total_time - acquire_wall - post_process
        acquire_wall = estimates.time_quantum_s or 0.0
        timing = TimingBreakdown(
            time_total_s=total_time,
            time_pre_compute_s=total_time - acquire_wall - post_time,
            time_acquire_wall_s=acquire_wall,
            time_aer_simulate_s=total_aer_time,
            time_post_process_s=post_time,
            per_setting_aer_times_s=all_per_setting_aer_times,
        )

        # Estimate quantum hardware time if profile provided
        if hw_timing_profile is not None:
            from quartumse.analysis.quantum_time_model import (
                estimate_quantum_hw_time,
                extract_circuit_timing_info,
            )

            circuit_info = extract_circuit_timing_info(circuit)
            timing.est_quantum_hw_s = estimate_quantum_hw_time(
                circuit_info=circuit_info,
                n_shots=state.total_shots_used,
                n_settings=estimates.n_settings or 1,
                hw_profile=hw_timing_profile,
            )

        estimates.timing_breakdown = timing

        return estimates

    def get_info(self) -> dict[str, Any]:
        """Get protocol information for manifest/logging."""
        return {
            "protocol_id": self.protocol_id,
            "protocol_version": self.protocol_version,
            "config": self.config.to_dict(),
        }

__init__(config=None)

Initialize protocol with configuration.

Parameters:

Name Type Description Default
config ProtocolConfig | None

Protocol configuration. Uses defaults if None.

None
Source code in src/quartumse/protocols/base.py
116
117
118
119
120
121
122
def __init__(self, config: ProtocolConfig | None = None) -> None:
    """Initialize protocol with configuration.

    Args:
        config: Protocol configuration. Uses defaults if None.
    """
    self.config = config or ProtocolConfig()

acquire(circuit, plan, backend, seed, deadline=None) abstractmethod

Execute measurements according to the plan (§5.1).

This method generates measurement circuits, executes them on the backend, and returns raw measurement outcomes.

Parameters:

Name Type Description Default
circuit QuantumCircuit

The state preparation circuit.

required
plan MeasurementPlan

Measurement plan to execute.

required
backend AerSimulator | Any

Quantum backend for execution.

required
seed int

Random seed for measurement randomness.

required
deadline float | None

Absolute time (time.time()) by which to stop. If None, no timeout.

None

Returns:

Type Description
RawDatasetChunk

RawDatasetChunk containing measurement outcomes.

Source code in src/quartumse/protocols/base.py
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
@abstractmethod
def acquire(
    self,
    circuit: QuantumCircuit,
    plan: MeasurementPlan,
    backend: AerSimulator | Any,
    seed: int,
    deadline: float | None = None,
) -> RawDatasetChunk:
    """Execute measurements according to the plan (§5.1).

    This method generates measurement circuits, executes them on the
    backend, and returns raw measurement outcomes.

    Args:
        circuit: The state preparation circuit.
        plan: Measurement plan to execute.
        backend: Quantum backend for execution.
        seed: Random seed for measurement randomness.
        deadline: Absolute time (time.time()) by which to stop.
            If None, no timeout.

    Returns:
        RawDatasetChunk containing measurement outcomes.
    """
    ...

finalize(state, observable_set) abstractmethod

Produce final estimates from accumulated data (§5.1).

This method processes all accumulated measurement data to produce final expectation value estimates with uncertainty quantification.

Parameters:

Name Type Description Default
state ProtocolState

Final protocol state with all accumulated data.

required
observable_set ObservableSet

The observables to estimate.

required

Returns:

Type Description
Estimates

Estimates containing per-observable results.

Source code in src/quartumse/protocols/base.py
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
@abstractmethod
def finalize(
    self,
    state: ProtocolState,
    observable_set: ObservableSet,
) -> Estimates:
    """Produce final estimates from accumulated data (§5.1).

    This method processes all accumulated measurement data to produce
    final expectation value estimates with uncertainty quantification.

    Args:
        state: Final protocol state with all accumulated data.
        observable_set: The observables to estimate.

    Returns:
        Estimates containing per-observable results.
    """
    ...

get_info()

Get protocol information for manifest/logging.

Source code in src/quartumse/protocols/base.py
394
395
396
397
398
399
400
def get_info(self) -> dict[str, Any]:
    """Get protocol information for manifest/logging."""
    return {
        "protocol_id": self.protocol_id,
        "protocol_version": self.protocol_version,
        "config": self.config.to_dict(),
    }

initialize(observable_set, total_budget, seed) abstractmethod

Initialize protocol state for a new estimation task (§5.1).

This method is called once at the start of estimation to set up the protocol's internal state.

Parameters:

Name Type Description Default
observable_set ObservableSet

The set of observables to estimate.

required
total_budget int

Total shot budget for the estimation.

required
seed int

Random seed for reproducibility.

required

Returns:

Type Description
ProtocolState

Initialized ProtocolState.

Source code in src/quartumse/protocols/base.py
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
@abstractmethod
def initialize(
    self,
    observable_set: ObservableSet,
    total_budget: int,
    seed: int,
) -> ProtocolState:
    """Initialize protocol state for a new estimation task (§5.1).

    This method is called once at the start of estimation to set up
    the protocol's internal state.

    Args:
        observable_set: The set of observables to estimate.
        total_budget: Total shot budget for the estimation.
        seed: Random seed for reproducibility.

    Returns:
        Initialized ProtocolState.
    """
    ...

next_plan(state, remaining_budget) abstractmethod

Generate the next measurement plan (§5.1).

For static protocols, this returns a single plan consuming the full budget. For adaptive protocols, this may return partial plans to be executed iteratively.

Parameters:

Name Type Description Default
state ProtocolState

Current protocol state.

required
remaining_budget int

Remaining shot budget.

required

Returns:

Type Description
MeasurementPlan

MeasurementPlan specifying settings and shot allocation.

Source code in src/quartumse/protocols/base.py
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
@abstractmethod
def next_plan(
    self,
    state: ProtocolState,
    remaining_budget: int,
) -> MeasurementPlan:
    """Generate the next measurement plan (§5.1).

    For static protocols, this returns a single plan consuming the
    full budget. For adaptive protocols, this may return partial plans
    to be executed iteratively.

    Args:
        state: Current protocol state.
        remaining_budget: Remaining shot budget.

    Returns:
        MeasurementPlan specifying settings and shot allocation.
    """
    ...

run(circuit, observable_set, total_budget, backend, seed=None, timeout_s=None, hw_timing_profile=None)

Execute the full protocol pipeline.

This is a convenience method that runs the full initialize -> (next_plan -> acquire -> update)* -> finalize loop.

Parameters:

Name Type Description Default
circuit QuantumCircuit

State preparation circuit.

required
observable_set ObservableSet

Observables to estimate.

required
total_budget int

Total shot budget.

required
backend AerSimulator | Any

Quantum backend.

required
seed int | None

Random seed (uses config.random_seed if None).

None
timeout_s float | None

Optional per-run timeout in seconds. If the deadline is exceeded, the run stops early, finalizes with partial data, and sets timed_out=True on the returned Estimates.

None
hw_timing_profile Any | None

Optional HardwareTimingProfile for estimating real-device execution time.

None

Returns:

Type Description
Estimates

Final Estimates.

Source code in src/quartumse/protocols/base.py
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
def run(
    self,
    circuit: QuantumCircuit,
    observable_set: ObservableSet,
    total_budget: int,
    backend: AerSimulator | Any,
    seed: int | None = None,
    timeout_s: float | None = None,
    hw_timing_profile: Any | None = None,
) -> Estimates:
    """Execute the full protocol pipeline.

    This is a convenience method that runs the full initialize ->
    (next_plan -> acquire -> update)* -> finalize loop.

    Args:
        circuit: State preparation circuit.
        observable_set: Observables to estimate.
        total_budget: Total shot budget.
        backend: Quantum backend.
        seed: Random seed (uses config.random_seed if None).
        timeout_s: Optional per-run timeout in seconds. If the deadline
            is exceeded, the run stops early, finalizes with partial data,
            and sets timed_out=True on the returned Estimates.
        hw_timing_profile: Optional HardwareTimingProfile for estimating
            real-device execution time.

    Returns:
        Final Estimates.
    """
    seed = seed if seed is not None else (self.config.random_seed or 42)

    deadline = (time.time() + timeout_s) if timeout_s is not None else None
    timed_out = False

    # Initialize + plan (pre-compute phase)
    pre_compute_start = time.time()
    state = self.initialize(observable_set, total_budget, seed)
    remaining = total_budget
    round_seed = seed

    start_time = time.time()
    total_aer_time = 0.0
    all_per_setting_aer_times: list[float] = []

    # Main loop (single iteration for static protocols)
    while remaining > 0 and not state.converged:
        if state.n_rounds >= self.config.max_rounds:
            break

        # Check deadline before planning
        if deadline is not None and time.time() >= deadline:
            timed_out = True
            break

        # Plan
        plan = self.next_plan(state, remaining)
        if plan.total_shots == 0:
            break

        pre_compute_time = time.time() - pre_compute_start

        # Acquire
        round_start = time.time()
        chunk = self.acquire(circuit, plan, backend, round_seed, deadline=deadline)
        quantum_time = time.time() - round_start

        # Extract per-setting AER times from chunk metadata
        chunk_aer_time = chunk.metadata.get("aer_simulate_s", 0.0)
        total_aer_time += chunk_aer_time
        per_setting_times = chunk.metadata.get("per_setting_aer_times_s", [])
        all_per_setting_aer_times.extend(per_setting_times)

        # Check if acquire() hit the deadline
        if chunk.metadata.get("timed_out", False):
            timed_out = True

        # Accumulate raw data for post-hoc analysis
        state.add_chunk(chunk)

        # Update
        classical_start = time.time()
        state = self.update(state, chunk)
        classical_time = time.time() - classical_start

        # Record round metadata (append if update() didn't already)
        if len(state.round_metadata) < state.n_rounds:
            state.round_metadata.append({})
        state.round_metadata[-1].update(
            {
                "quantum_time_s": quantum_time,
                "classical_time_s": classical_time,
                "shots_this_round": chunk.n_shots,
                "settings_this_round": plan.n_settings,
            }
        )

        remaining -= chunk.n_shots
        round_seed += 1

        # Reset pre_compute_start for next round's planning
        pre_compute_start = time.time()

        if timed_out:
            break

    # Finalize (post-process phase)
    post_start = time.time()
    estimates = self.finalize(state, observable_set)
    post_time = time.time() - post_start
    estimates.raw_chunks = state.accumulated_data

    # Add timing and protocol info
    total_time = time.time() - start_time
    estimates.time_quantum_s = sum(m.get("quantum_time_s", 0) for m in state.round_metadata)
    estimates.time_classical_s = total_time - (estimates.time_quantum_s or 0)
    estimates.protocol_id = self.protocol_id
    estimates.protocol_version = self.protocol_version

    # Timeout info
    estimates.timed_out = timed_out
    if timed_out:
        estimates.n_shots_completed = state.total_shots_used

    # Build timing breakdown
    pre_compute_total = sum(
        m.get("quantum_time_s", 0) for m in state.round_metadata
    )
    # pre_compute_time is from first round; approximate total pre-compute
    # as total_time - acquire_wall - post_process
    acquire_wall = estimates.time_quantum_s or 0.0
    timing = TimingBreakdown(
        time_total_s=total_time,
        time_pre_compute_s=total_time - acquire_wall - post_time,
        time_acquire_wall_s=acquire_wall,
        time_aer_simulate_s=total_aer_time,
        time_post_process_s=post_time,
        per_setting_aer_times_s=all_per_setting_aer_times,
    )

    # Estimate quantum hardware time if profile provided
    if hw_timing_profile is not None:
        from quartumse.analysis.quantum_time_model import (
            estimate_quantum_hw_time,
            extract_circuit_timing_info,
        )

        circuit_info = extract_circuit_timing_info(circuit)
        timing.est_quantum_hw_s = estimate_quantum_hw_time(
            circuit_info=circuit_info,
            n_shots=state.total_shots_used,
            n_settings=estimates.n_settings or 1,
            hw_profile=hw_timing_profile,
        )

    estimates.timing_breakdown = timing

    return estimates

update(state, data_chunk) abstractmethod

Update protocol state with new measurement data (§5.1).

For static protocols, this simply accumulates data. For adaptive protocols, this may update variance estimates, convergence checks, or other internal state used for planning.

Parameters:

Name Type Description Default
state ProtocolState

Current protocol state.

required
data_chunk RawDatasetChunk

New measurement data.

required

Returns:

Type Description
ProtocolState

Updated ProtocolState.

Source code in src/quartumse/protocols/base.py
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
@abstractmethod
def update(
    self,
    state: ProtocolState,
    data_chunk: RawDatasetChunk,
) -> ProtocolState:
    """Update protocol state with new measurement data (§5.1).

    For static protocols, this simply accumulates data. For adaptive
    protocols, this may update variance estimates, convergence checks,
    or other internal state used for planning.

    Args:
        state: Current protocol state.
        data_chunk: New measurement data.

    Returns:
        Updated ProtocolState.
    """
    ...

ProvenanceManifest

High-level interface for creating and managing provenance manifests.

Source code in src/quartumse/reporting/manifest.py
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
class ProvenanceManifest:
    """
    High-level interface for creating and managing provenance manifests.
    """

    def __init__(self, schema: ManifestSchema):
        self.schema = schema

    @classmethod
    def create(
        cls,
        experiment_id: str,
        circuit_fingerprint: CircuitFingerprint,
        backend_snapshot: BackendSnapshot,
        **kwargs: Any,
    ) -> "ProvenanceManifest":
        """Create a new manifest with required fields."""
        schema = ManifestSchema(
            experiment_id=experiment_id,
            circuit=circuit_fingerprint,
            backend=backend_snapshot,
            **kwargs,
        )
        return cls(schema)

    def to_json(self, path: str | Path | None = None) -> str:
        """Export manifest as JSON."""
        json_str = self.schema.model_dump_json(indent=2)

        if path:
            Path(path).write_text(json_str)

        return json_str

    @classmethod
    def from_json(cls, path: str | Path) -> "ProvenanceManifest":
        """Load manifest from JSON file."""
        json_data = Path(path).read_text()
        schema = ManifestSchema.model_validate_json(json_data)
        return cls(schema)

    def add_tag(self, tag: str) -> None:
        """Add a searchable tag."""
        if tag not in self.schema.tags:
            self.schema.tags.append(tag)

    def update_results(self, results: dict[str, Any]) -> None:
        """Update the results summary."""
        self.schema.results_summary.update(results)

    def validate(self, *, require_shot_file: bool = True) -> bool:
        """Validate the manifest schema and ensure referenced artifacts exist."""

        if require_shot_file:
            shot_path = Path(self.schema.shot_data_path)
            if not shot_path.exists():
                raise FileNotFoundError(f"Shot data referenced by manifest is missing: {shot_path}")

        return True

    def __repr__(self) -> str:
        return (
            f"ProvenanceManifest(id={self.schema.experiment_id}, "
            f"backend={self.schema.backend.backend_name}, "
            f"created={self.schema.created_at.isoformat()})"
        )

add_tag(tag)

Add a searchable tag.

Source code in src/quartumse/reporting/manifest.py
250
251
252
253
def add_tag(self, tag: str) -> None:
    """Add a searchable tag."""
    if tag not in self.schema.tags:
        self.schema.tags.append(tag)

create(experiment_id, circuit_fingerprint, backend_snapshot, **kwargs) classmethod

Create a new manifest with required fields.

Source code in src/quartumse/reporting/manifest.py
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
@classmethod
def create(
    cls,
    experiment_id: str,
    circuit_fingerprint: CircuitFingerprint,
    backend_snapshot: BackendSnapshot,
    **kwargs: Any,
) -> "ProvenanceManifest":
    """Create a new manifest with required fields."""
    schema = ManifestSchema(
        experiment_id=experiment_id,
        circuit=circuit_fingerprint,
        backend=backend_snapshot,
        **kwargs,
    )
    return cls(schema)

from_json(path) classmethod

Load manifest from JSON file.

Source code in src/quartumse/reporting/manifest.py
243
244
245
246
247
248
@classmethod
def from_json(cls, path: str | Path) -> "ProvenanceManifest":
    """Load manifest from JSON file."""
    json_data = Path(path).read_text()
    schema = ManifestSchema.model_validate_json(json_data)
    return cls(schema)

to_json(path=None)

Export manifest as JSON.

Source code in src/quartumse/reporting/manifest.py
234
235
236
237
238
239
240
241
def to_json(self, path: str | Path | None = None) -> str:
    """Export manifest as JSON."""
    json_str = self.schema.model_dump_json(indent=2)

    if path:
        Path(path).write_text(json_str)

    return json_str

update_results(results)

Update the results summary.

Source code in src/quartumse/reporting/manifest.py
255
256
257
def update_results(self, results: dict[str, Any]) -> None:
    """Update the results summary."""
    self.schema.results_summary.update(results)

validate(*, require_shot_file=True)

Validate the manifest schema and ensure referenced artifacts exist.

Source code in src/quartumse/reporting/manifest.py
259
260
261
262
263
264
265
266
267
def validate(self, *, require_shot_file: bool = True) -> bool:
    """Validate the manifest schema and ensure referenced artifacts exist."""

    if require_shot_file:
        shot_path = Path(self.schema.shot_data_path)
        if not shot_path.exists():
            raise FileNotFoundError(f"Shot data referenced by manifest is missing: {shot_path}")

    return True

Report

Container for experiment report data.

Source code in src/quartumse/reporting/report.py
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
class Report:
    """Container for experiment report data."""

    def __init__(
        self,
        manifest: ProvenanceManifest,
        plots: dict[str, Any] | None = None,
        shot_diagnostics: ShotDataDiagnostics | None = None,
    ):
        self.manifest = manifest
        self.plots = plots or {}
        self.shot_diagnostics = shot_diagnostics

    def to_html(self, output_path: str | Path | None = None) -> str:
        """Generate HTML report."""
        template = Template(HTML_TEMPLATE)
        metrics_context = normalise_metrics_for_report(
            self.manifest.schema.results_summary.get("metrics")
            if isinstance(self.manifest.schema.results_summary, dict)
            else None
        )
        html = template.render(
            manifest=self.manifest.schema,
            now=datetime.now(timezone.utc).isoformat(),
            shot_diagnostics=self.shot_diagnostics.to_dict() if self.shot_diagnostics else None,
            metrics=metrics_context,
        )

        if output_path:
            Path(output_path).write_text(html, encoding="utf-8")

        return html

    def to_pdf(self, output_path: str | Path) -> None:
        """Generate PDF report (requires weasyprint)."""
        try:
            from weasyprint import HTML

            html_content = self.to_html()
            HTML(string=html_content).write_pdf(output_path)
        except ImportError as err:
            raise ImportError(
                "PDF generation requires weasyprint. Install with: pip install weasyprint"
            ) from err

to_html(output_path=None)

Generate HTML report.

Source code in src/quartumse/reporting/report.py
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
def to_html(self, output_path: str | Path | None = None) -> str:
    """Generate HTML report."""
    template = Template(HTML_TEMPLATE)
    metrics_context = normalise_metrics_for_report(
        self.manifest.schema.results_summary.get("metrics")
        if isinstance(self.manifest.schema.results_summary, dict)
        else None
    )
    html = template.render(
        manifest=self.manifest.schema,
        now=datetime.now(timezone.utc).isoformat(),
        shot_diagnostics=self.shot_diagnostics.to_dict() if self.shot_diagnostics else None,
        metrics=metrics_context,
    )

    if output_path:
        Path(output_path).write_text(html, encoding="utf-8")

    return html

to_pdf(output_path)

Generate PDF report (requires weasyprint).

Source code in src/quartumse/reporting/report.py
392
393
394
395
396
397
398
399
400
401
402
def to_pdf(self, output_path: str | Path) -> None:
    """Generate PDF report (requires weasyprint)."""
    try:
        from weasyprint import HTML

        html_content = self.to_html()
        HTML(string=html_content).write_pdf(output_path)
    except ImportError as err:
        raise ImportError(
            "PDF generation requires weasyprint. Install with: pip install weasyprint"
        ) from err

ReportBuilder

Builder for constructing benchmark reports.

Source code in src/quartumse/viz/reports.py
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
class ReportBuilder:
    """Builder for constructing benchmark reports."""

    def __init__(self, run_id: str, title: str | None = None) -> None:
        """Initialize report builder.

        Args:
            run_id: Benchmark run identifier.
            title: Report title.
        """
        self.report = BenchmarkReport(
            title=title or f"Benchmark Report: {run_id}",
            run_id=run_id,
        )

    def with_manifest(self, manifest: RunManifest) -> ReportBuilder:
        """Add run manifest."""
        self.report.manifest = manifest
        self.report.methodology_version = manifest.methodology_version
        return self

    def add_overview_section(
        self,
        n_protocols: int,
        n_circuits: int,
        n_observables: int,
        n_replicates: int,
        n_grid: list[int],
    ) -> ReportBuilder:
        """Add overview section."""
        section = ReportSection(
            title="Overview",
            content=f"This report summarizes benchmark results comparing {n_protocols} protocols "
            f"across {n_circuits} circuits with {n_observables} observables.",
            tables=[
                {
                    "title": "Benchmark Configuration",
                    "headers": ["Parameter", "Value"],
                    "rows": [
                        ["Protocols", n_protocols],
                        ["Circuits", n_circuits],
                        ["Observables", n_observables],
                        ["Replicates", n_replicates],
                        ["Shot budgets", f"{min(n_grid):,} - {max(n_grid):,}"],
                    ],
                }
            ],
        )
        self.report.add_section(section)
        return self

    def add_summary_section(
        self,
        summaries: list[SummaryRow],
    ) -> ReportBuilder:
        """Add summary statistics section."""
        # Group by protocol
        by_protocol: dict[str, list[SummaryRow]] = {}
        for s in summaries:
            if s.protocol_id not in by_protocol:
                by_protocol[s.protocol_id] = []
            by_protocol[s.protocol_id].append(s)

        rows = []
        for protocol_id, protocol_summaries in by_protocol.items():
            mean_se = sum(s.se_mean for s in protocol_summaries) / len(protocol_summaries)
            max_se = max(s.se_max for s in protocol_summaries)
            rows.append([protocol_id, f"{mean_se:.4f}", f"{max_se:.4f}"])

        section = ReportSection(
            title="Summary Statistics",
            tables=[
                {
                    "title": "Protocol Performance Summary",
                    "headers": ["Protocol", "Mean SE", "Max SE"],
                    "rows": rows,
                }
            ],
        )
        self.report.add_section(section)
        return self

    def add_task_results_section(
        self,
        task_results: list[TaskResult],
    ) -> ReportBuilder:
        """Add task results section."""
        rows = []
        for result in task_results:
            rows.append(
                [
                    result.task_id,
                    result.protocol_id,
                    result.N_star or "N/A",
                    f"{result.ssf:.2f}×" if result.ssf else "N/A",
                ]
            )

        section = ReportSection(
            title="Task Results",
            tables=[
                {
                    "title": "Benchmark Task Outcomes",
                    "headers": ["Task", "Protocol", "N*", "SSF"],
                    "rows": rows,
                }
            ],
        )
        self.report.add_section(section)
        return self

    def add_figures_section(
        self,
        figure_paths: list[str],
        title: str = "Figures",
    ) -> ReportBuilder:
        """Add figures section."""
        section = ReportSection(
            title=title,
            figures=figure_paths,
        )
        self.report.add_section(section)
        return self

    def add_conclusions_section(
        self,
        conclusions: str,
    ) -> ReportBuilder:
        """Add conclusions section."""
        section = ReportSection(
            title="Conclusions",
            content=conclusions,
        )
        self.report.add_section(section)
        return self

    def build(self) -> BenchmarkReport:
        """Build and return the report."""
        return self.report

__init__(run_id, title=None)

Initialize report builder.

Parameters:

Name Type Description Default
run_id str

Benchmark run identifier.

required
title str | None

Report title.

None
Source code in src/quartumse/viz/reports.py
248
249
250
251
252
253
254
255
256
257
258
def __init__(self, run_id: str, title: str | None = None) -> None:
    """Initialize report builder.

    Args:
        run_id: Benchmark run identifier.
        title: Report title.
    """
    self.report = BenchmarkReport(
        title=title or f"Benchmark Report: {run_id}",
        run_id=run_id,
    )

add_conclusions_section(conclusions)

Add conclusions section.

Source code in src/quartumse/viz/reports.py
369
370
371
372
373
374
375
376
377
378
379
def add_conclusions_section(
    self,
    conclusions: str,
) -> ReportBuilder:
    """Add conclusions section."""
    section = ReportSection(
        title="Conclusions",
        content=conclusions,
    )
    self.report.add_section(section)
    return self

add_figures_section(figure_paths, title='Figures')

Add figures section.

Source code in src/quartumse/viz/reports.py
356
357
358
359
360
361
362
363
364
365
366
367
def add_figures_section(
    self,
    figure_paths: list[str],
    title: str = "Figures",
) -> ReportBuilder:
    """Add figures section."""
    section = ReportSection(
        title=title,
        figures=figure_paths,
    )
    self.report.add_section(section)
    return self

add_overview_section(n_protocols, n_circuits, n_observables, n_replicates, n_grid)

Add overview section.

Source code in src/quartumse/viz/reports.py
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
def add_overview_section(
    self,
    n_protocols: int,
    n_circuits: int,
    n_observables: int,
    n_replicates: int,
    n_grid: list[int],
) -> ReportBuilder:
    """Add overview section."""
    section = ReportSection(
        title="Overview",
        content=f"This report summarizes benchmark results comparing {n_protocols} protocols "
        f"across {n_circuits} circuits with {n_observables} observables.",
        tables=[
            {
                "title": "Benchmark Configuration",
                "headers": ["Parameter", "Value"],
                "rows": [
                    ["Protocols", n_protocols],
                    ["Circuits", n_circuits],
                    ["Observables", n_observables],
                    ["Replicates", n_replicates],
                    ["Shot budgets", f"{min(n_grid):,} - {max(n_grid):,}"],
                ],
            }
        ],
    )
    self.report.add_section(section)
    return self

add_summary_section(summaries)

Add summary statistics section.

Source code in src/quartumse/viz/reports.py
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
def add_summary_section(
    self,
    summaries: list[SummaryRow],
) -> ReportBuilder:
    """Add summary statistics section."""
    # Group by protocol
    by_protocol: dict[str, list[SummaryRow]] = {}
    for s in summaries:
        if s.protocol_id not in by_protocol:
            by_protocol[s.protocol_id] = []
        by_protocol[s.protocol_id].append(s)

    rows = []
    for protocol_id, protocol_summaries in by_protocol.items():
        mean_se = sum(s.se_mean for s in protocol_summaries) / len(protocol_summaries)
        max_se = max(s.se_max for s in protocol_summaries)
        rows.append([protocol_id, f"{mean_se:.4f}", f"{max_se:.4f}"])

    section = ReportSection(
        title="Summary Statistics",
        tables=[
            {
                "title": "Protocol Performance Summary",
                "headers": ["Protocol", "Mean SE", "Max SE"],
                "rows": rows,
            }
        ],
    )
    self.report.add_section(section)
    return self

add_task_results_section(task_results)

Add task results section.

Source code in src/quartumse/viz/reports.py
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
def add_task_results_section(
    self,
    task_results: list[TaskResult],
) -> ReportBuilder:
    """Add task results section."""
    rows = []
    for result in task_results:
        rows.append(
            [
                result.task_id,
                result.protocol_id,
                result.N_star or "N/A",
                f"{result.ssf:.2f}×" if result.ssf else "N/A",
            ]
        )

    section = ReportSection(
        title="Task Results",
        tables=[
            {
                "title": "Benchmark Task Outcomes",
                "headers": ["Task", "Protocol", "N*", "SSF"],
                "rows": rows,
            }
        ],
    )
    self.report.add_section(section)
    return self

build()

Build and return the report.

Source code in src/quartumse/viz/reports.py
381
382
383
def build(self) -> BenchmarkReport:
    """Build and return the report."""
    return self.report

with_manifest(manifest)

Add run manifest.

Source code in src/quartumse/viz/reports.py
260
261
262
263
264
def with_manifest(self, manifest: RunManifest) -> ReportBuilder:
    """Add run manifest."""
    self.report.manifest = manifest
    self.report.methodology_version = manifest.methodology_version
    return self

ShadowConfig

Bases: BaseModel

Configuration for classical shadows estimation.

Source code in src/quartumse/shadows/config.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
class ShadowConfig(BaseModel):
    """Configuration for classical shadows estimation."""

    # Core parameters
    version: ShadowVersion = Field(
        default=ShadowVersion.V0_BASELINE, description="Shadows algorithm version"
    )
    shadow_size: int = Field(
        default=1000, description="Number of random measurements (shadow size)"
    )
    measurement_ensemble: MeasurementEnsemble = Field(
        default=MeasurementEnsemble.RANDOM_LOCAL_CLIFFORD
    )

    # v1+ (noise-aware)
    apply_inverse_channel: bool = Field(
        default=False, description="Apply noise-aware inverse channel (v1+)"
    )
    noise_model_path: str | None = Field(None, description="Path to serialized noise model")

    # v2+ (fermionic)
    fermionic_mode: bool = Field(default=False, description="Enable fermionic shadows (v2+)")
    rdm_order: int = Field(default=1, description="RDM order for fermionic mode (1 or 2)")

    # Adaptive
    adaptive: bool = Field(default=False, description="Use adaptive measurement selection")
    target_observables: list[str] | None = Field(
        None, description="Observable strings for adaptive prioritization"
    )
    derandomization_strategy: str | None = Field(
        None, description="greedy, importance_sampling, etc."
    )

    # v4+ (robust)
    bayesian_inference: bool = Field(
        default=False, description="Enable Bayesian robust estimation (v4+)"
    )
    bootstrap_samples: int = Field(default=1000, description="Bootstrap samples for CI (v4+)")
    confidence_level: float = Field(default=0.95, description="Confidence interval level")

    # General settings
    random_seed: int | None = Field(None, description="Random seed for reproducibility")
    parallel_shots: bool = Field(
        default=True, description="Execute shadow measurements in parallel batches"
    )
    batch_size: int | None = Field(None, description="Batch size for parallel execution")

    # Variance reduction
    median_of_means: bool = Field(
        default=False, description="Use median-of-means estimator for robustness"
    )
    num_groups: int = Field(default=10, description="Number of groups for median-of-means")

    # Advanced
    custom_parameters: dict[str, Any] = Field(
        default_factory=dict, description="Version-specific custom parameters"
    )

    model_config = ConfigDict(use_enum_values=False)

    def validate_version_compatibility(self) -> None:
        """Validate that enabled features match the selected version."""

        # Warning: simplified validation
        # In production, this would check feature availability
        pass

validate_version_compatibility()

Validate that enabled features match the selected version.

Source code in src/quartumse/shadows/config.py
88
89
90
91
92
93
def validate_version_compatibility(self) -> None:
    """Validate that enabled features match the selected version."""

    # Warning: simplified validation
    # In production, this would check feature availability
    pass

ShadowEstimator

Bases: Estimator

Observable estimator using classical shadows.

Automatically selects shadow version based on config and orchestrates: 1. Shadow measurement generation 2. Circuit execution 3. Shadow reconstruction 4. Observable estimation 5. Provenance tracking

Source code in src/quartumse/estimator/shadow_estimator.py
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
class ShadowEstimator(Estimator):
    """
    Observable estimator using classical shadows.

    Automatically selects shadow version based on config and orchestrates:
    1. Shadow measurement generation
    2. Circuit execution
    3. Shadow reconstruction
    4. Observable estimation
    5. Provenance tracking
    """

    def __init__(
        self,
        backend: Backend | str,
        shadow_config: ShadowConfig | None = None,
        mitigation_config: MitigationConfig | None = None,
        data_dir: str | Path | None = None,
    ):
        """
        Initialize shadow estimator.

        Args:
            backend: Qiskit backend or backend name (e.g., "aer_simulator")
            shadow_config: Classical shadows configuration
            mitigation_config: Error mitigation configuration
            data_dir: Directory for storing shot data and manifests
        """
        # Handle backend
        self._backend_descriptor: str | None = None
        self._backend_snapshot: BackendSnapshot | None = None

        if isinstance(backend, str):
            self._backend_descriptor = backend
            if ":" in backend:
                resolved_backend, snapshot = resolve_backend(backend)
                backend = resolved_backend
                self._backend_snapshot = snapshot
            elif backend == "aer_simulator":
                backend = AerSimulator()
                self._backend_snapshot = create_backend_snapshot(backend)
            else:
                raise ValueError(f"Unknown backend string: {backend}")
        else:
            self._backend_descriptor = getattr(backend, "name", None)

        super().__init__(backend, shadow_config)

        self._runtime_sampler: SamplerPrimitive | None = None
        self._runtime_sampler_checked = False
        self._use_runtime_sampler = is_ibm_runtime_backend(self.backend)

        self.shadow_config = shadow_config or ShadowConfig.model_validate({})
        self.mitigation_config = mitigation_config or MitigationConfig()
        self.data_dir = Path(data_dir) if data_dir else Path("./data")
        self.data_dir.mkdir(parents=True, exist_ok=True)

        self.measurement_error_mitigation: MeasurementErrorMitigation | None = None
        self._mem_required = (
            self.shadow_config.version == ShadowVersion.V1_NOISE_AWARE
            or self.shadow_config.apply_inverse_channel
            or ("MEM" in self.mitigation_config.techniques)
        )
        if self._mem_required:
            self.measurement_error_mitigation = MeasurementErrorMitigation(self.backend)

        # Initialize shadow implementation based on version
        self.shadow_impl: ClassicalShadows = self._create_shadow_implementation()

        # Initialize shot data writer
        self.shot_data_writer = ShotDataWriter(self.data_dir)

    def _get_runtime_sampler(self) -> SamplerPrimitive | None:
        """Initialise (if necessary) and return the IBM Runtime sampler."""

        if not self._use_runtime_sampler:
            return None

        if not self._runtime_sampler_checked:
            self._runtime_sampler = create_runtime_sampler(self.backend)
            self._runtime_sampler_checked = True

        return self._runtime_sampler

    def _create_shadow_implementation(self) -> ClassicalShadows:
        """Factory for shadow implementations."""
        version = self.shadow_config.version

        if version == ShadowVersion.V0_BASELINE:
            return RandomLocalCliffordShadows(self.shadow_config)
        elif version == ShadowVersion.V1_NOISE_AWARE:
            if self.measurement_error_mitigation is None:
                self.measurement_error_mitigation = MeasurementErrorMitigation(self.backend)
            return NoiseAwareRandomLocalCliffordShadows(
                self.shadow_config, self.measurement_error_mitigation
            )
        elif version == ShadowVersion.V2_FERMIONIC:
            # TODO: Implement v2
            raise NotImplementedError("Shadows v2 (fermionic) not yet implemented")
        elif version == ShadowVersion.V3_ADAPTIVE:
            # TODO: Implement adaptive shadows
            raise NotImplementedError("Adaptive shadows not yet implemented")
        elif version == ShadowVersion.V4_ROBUST:
            # TODO: Implement v4
            raise NotImplementedError("Shadows v4 (robust) not yet implemented")
        else:
            raise ValueError(f"Unknown shadow version: {version}")

    def estimate(
        self,
        circuit: QuantumCircuit,
        observables: list[Observable],
        target_precision: float | None = None,
        save_manifest: bool = True,
    ) -> EstimationResult:
        """
        Estimate observables using classical shadows.

        Workflow:
        1. Generate shadow measurement circuits
        2. Transpile and execute on backend
        3. Reconstruct shadow snapshots
        4. Estimate all observables
        5. Generate provenance manifest
        """
        experiment_id = str(uuid.uuid4())
        start_time = time.time()

        # Determine shadow size
        if target_precision:
            required_sizes = [
                self.shadow_impl.estimate_shadow_size_needed(obs, target_precision)
                for obs in observables
            ]
            shadow_size = max(required_sizes) if required_sizes else self.shadow_config.shadow_size
            if shadow_size <= 0:
                raise ValueError("Shadow size estimation produced a non-positive value")
            self.shadow_config.shadow_size = shadow_size
            self.shadow_impl.config.shadow_size = shadow_size
        else:
            shadow_size = self.shadow_config.shadow_size
            self.shadow_impl.config.shadow_size = shadow_size

        # Generate shadow measurement circuits
        shadow_circuits = self.shadow_impl.generate_measurement_circuits(circuit, shadow_size)

        # Calibrate measurement error mitigation if required
        if isinstance(self.shadow_impl, NoiseAwareRandomLocalCliffordShadows):
            mem_params = self.mitigation_config.parameters
            mem_shots = int(mem_params.get("mem_shots", 4096))
            mem_qubits_param = mem_params.get("mem_qubits")
            if mem_qubits_param is None:
                mem_qubits = list(range(circuit.num_qubits))
            elif isinstance(mem_qubits_param, (list, tuple)):
                mem_qubits = [int(q) for q in mem_qubits_param]
            else:
                mem_qubits = [int(mem_qubits_param)]

            mem_force = bool(mem_params.get("mem_force_calibration", False))
            run_options = mem_params.get("mem_run_options", {})
            mem_confusion_path_str = self.mitigation_config.confusion_matrix_path

            if mem_confusion_path_str and not mem_force:
                try:
                    self.shadow_impl.mem.load_confusion_matrix(mem_confusion_path_str)
                    metadata = self.shadow_impl.mem.get_confusion_metadata()
                    if isinstance(metadata.get("shots_per_state"), (int, float)):
                        mem_shots = int(metadata["shots_per_state"])
                        mem_params["mem_shots"] = mem_shots
                    if isinstance(metadata.get("qubits"), (list, tuple)):
                        mem_qubits = [int(q) for q in metadata["qubits"]]
                        mem_params["mem_qubits"] = mem_qubits
                except FileNotFoundError:
                    LOGGER.warning(
                        "Configured confusion matrix %s not found; recalibrating.",
                        mem_confusion_path_str,
                    )
                    mem_confusion_path_str = None

            if (
                self.shadow_impl.mem.confusion_matrix is None
                or mem_force
                or not mem_confusion_path_str
            ):
                mem_dir = self.data_dir / "mem"
                mem_dir.mkdir(parents=True, exist_ok=True)
                confusion_matrix_path = mem_dir / f"{experiment_id}.npz"
                saved_confusion_path = self.shadow_impl.mem.calibrate(
                    mem_qubits,
                    shots=mem_shots,
                    run_options=run_options,
                    output_path=confusion_matrix_path,
                )
                mem_confusion_path = (
                    saved_confusion_path
                    if saved_confusion_path is not None
                    else confusion_matrix_path
                )
                self.mitigation_config.confusion_matrix_path = str(mem_confusion_path.resolve())
                mem_confusion_path_str = self.mitigation_config.confusion_matrix_path
                self.shadow_impl.mem.confusion_matrix_path = Path(mem_confusion_path_str)
            else:
                self.mitigation_config.confusion_matrix_path = mem_confusion_path_str

            if "MEM" not in self.mitigation_config.techniques:
                self.mitigation_config.techniques.append("MEM")
            mem_params["mem_qubits"] = mem_qubits
            mem_params["mem_shots"] = mem_shots

        # Transpile for backend
        transpiled_circuits = transpile(shadow_circuits, backend=self.backend)

        # Respect backend batching limits
        max_experiments = None
        backend_config = None
        if hasattr(self.backend, "configuration"):
            try:
                backend_config = self.backend.configuration()
            except Exception:
                backend_config = None

        if backend_config is not None:
            max_experiments = getattr(backend_config, "max_experiments", None)

        if isinstance(max_experiments, np.integer):
            max_experiments = int(max_experiments)

        if not isinstance(max_experiments, int) or max_experiments <= 0:
            # Use safe default batch size for IBM backends to avoid submission failures
            max_experiments = 500
            print(
                f"Warning: Backend max_experiments unavailable or invalid. "
                f"Using safe default batch size: {max_experiments}"
            )

        measurement_outcomes_list: list[np.ndarray] = []

        sampler = self._get_runtime_sampler()

        for start_idx in range(0, len(transpiled_circuits), max_experiments):
            circuit_batch = transpiled_circuits[start_idx : start_idx + max_experiments]
            if sampler is not None:
                job = sampler.run(list(circuit_batch), shots=1)
                result = job.result()

                for batch_idx, _ in enumerate(circuit_batch):
                    counts = result[batch_idx].data.meas.get_counts()
                    bitstring = list(counts.keys())[0].replace(" ", "")
                    outcomes = np.array([int(b) for b in bitstring[::-1]], dtype=int)
                    measurement_outcomes_list.append(outcomes)
            else:
                job = self.backend.run(circuit_batch, shots=1)  # Each circuit is one shadow
                result = job.result()

                for batch_idx, _ in enumerate(circuit_batch):
                    counts = result.get_counts(batch_idx)
                    bitstring = list(counts.keys())[0].replace(" ", "")
                    outcomes = np.array([int(b) for b in bitstring[::-1]], dtype=int)
                    measurement_outcomes_list.append(outcomes)

        if len(measurement_outcomes_list) != shadow_size:
            raise RuntimeError(
                "Collected measurement outcomes do not match the requested shadow size."
            )

        measurement_outcomes = np.asarray(measurement_outcomes_list, dtype=int)

        measurement_bases = self.shadow_impl.measurement_bases
        if measurement_bases is None:
            raise ValueError("Shadow implementation did not record measurement bases.")
        measurement_bases = np.asarray(measurement_bases, dtype=int)
        self.shadow_impl.measurement_bases = measurement_bases

        # Save shot data to Parquet
        shot_data_path = self.shot_data_writer.save_shadow_measurements(
            experiment_id=experiment_id,
            measurement_bases=measurement_bases,
            measurement_outcomes=measurement_outcomes,
            num_qubits=circuit.num_qubits,
        )

        # Reconstruct shadows
        self.shadow_impl.reconstruct_classical_shadow(measurement_outcomes, measurement_bases)

        # Estimate all observables
        estimates: dict[str, dict[str, object]] = {}
        for obs in observables:
            estimate = self.shadow_impl.estimate_observable(obs)
            estimates[str(obs)] = {
                "expectation_value": estimate.expectation_value,
                "variance": estimate.variance,
                "ci_95": estimate.confidence_interval,
                "ci_width": estimate.ci_width,
            }

        execution_time = time.time() - start_time

        # Create provenance manifest
        if save_manifest:
            manifest = self._create_manifest(
                experiment_id,
                circuit,
                observables,
                estimates,
                shadow_size,
                execution_time,
                shot_data_path,
            )
            manifest_path = self.data_dir / "manifests" / f"{experiment_id}.json"
            manifest_path.parent.mkdir(parents=True, exist_ok=True)
            manifest.to_json(manifest_path)
        else:
            manifest_path = None

        return EstimationResult(
            observables=estimates,
            shots_used=shadow_size,
            execution_time=execution_time,
            backend_name=self.backend.name,
            experiment_id=experiment_id,
            manifest_path=str(manifest_path) if manifest_path else None,
            shot_data_path=str(shot_data_path),
            mitigation_confusion_matrix_path=self.mitigation_config.confusion_matrix_path,
        )

    def estimate_shots_needed(self, observables: list[Observable], target_precision: float) -> int:
        """Estimate shadow size needed for target precision."""
        # Use worst-case observable
        max_shadow_size = 0
        for obs in observables:
            size = self.shadow_impl.estimate_shadow_size_needed(obs, target_precision)
            max_shadow_size = max(max_shadow_size, size)

        return max_shadow_size

    def replay_from_manifest(
        self,
        manifest_path: str | Path,
        observables: list[Observable] | None = None,
    ) -> EstimationResult:
        """
        Replay an experiment from a saved manifest and shot data.

        This allows re-estimation of observables from previously collected shot data
        without re-executing circuits on the backend.

        Args:
            manifest_path: Path to the provenance manifest JSON file
            observables: Optional new list of observables to estimate. If None,
                        uses observables from the original manifest.

        Returns:
            EstimationResult with re-estimated observables
        """
        manifest_path = Path(manifest_path)
        if not manifest_path.exists():
            raise FileNotFoundError(f"Manifest not found: {manifest_path}")

        # Load manifest
        manifest = ProvenanceManifest.from_json(manifest_path)
        experiment_id = manifest.schema.experiment_id

        # Load shot data
        measurement_bases, measurement_outcomes, num_qubits = (
            self.shot_data_writer.load_shadow_measurements(experiment_id)
        )

        if manifest.schema.shadows is None:
            raise ValueError(
                "Manifest does not contain classical shadows configuration information."
            )

        # Reconstruct shadows with loaded data
        # Create temporary shadow implementation if needed
        shadow_payload = manifest.schema.shadows.model_dump()
        shadow_payload["random_seed"] = manifest.schema.random_seed
        shadow_config = ShadowConfig.model_validate(shadow_payload)

        resolved_confusion_matrix_path: str | None = (
            manifest.schema.mitigation.confusion_matrix_path
        )

        if shadow_config.version == ShadowVersion.V0_BASELINE:
            shadow_impl = RandomLocalCliffordShadows(shadow_config)
        elif shadow_config.version == ShadowVersion.V1_NOISE_AWARE:
            confusion_matrix_path_str = manifest.schema.mitigation.confusion_matrix_path

            if not confusion_matrix_path_str:
                raise FileNotFoundError(
                    "Noise-aware manifest does not include a persisted confusion matrix path. "
                    "Re-run estimation or provide the saved calibration artifact before replaying."
                )

            raw_confusion_path = Path(confusion_matrix_path_str)
            candidate_paths = [raw_confusion_path]

            if not raw_confusion_path.is_absolute():
                candidate_paths.append((manifest_path.parent / raw_confusion_path).resolve())
                candidate_paths.append((self.data_dir / raw_confusion_path).resolve())

            candidate_paths.append((self.data_dir / "mem" / raw_confusion_path.name).resolve())
            candidate_paths.append(
                (manifest_path.parent / "mem" / raw_confusion_path.name).resolve()
            )

            confusion_matrix_path: Path | None = None
            for candidate in candidate_paths:
                if candidate and candidate.exists():
                    confusion_matrix_path = candidate
                    break

            if confusion_matrix_path is None:
                raise FileNotFoundError(
                    "Unable to locate the persisted confusion matrix required for noise-aware replay. "
                    f"Looked for {raw_confusion_path} and related paths."
                )

            with np.load(confusion_matrix_path, allow_pickle=False) as archive:
                if "confusion_matrix" not in archive:
                    raise ValueError(
                        "Confusion matrix archive is missing the 'confusion_matrix' dataset."
                    )
                confusion_matrix = archive["confusion_matrix"]

            mem = MeasurementErrorMitigation(self.backend)
            mem.confusion_matrix = confusion_matrix
            mem.confusion_matrix_path = confusion_matrix_path.resolve()
            mem._calibrated_qubits = tuple(range(num_qubits))

            shadow_impl = NoiseAwareRandomLocalCliffordShadows(shadow_config, mem)
            resolved_confusion_matrix_path = str(confusion_matrix_path.resolve())
        else:
            raise NotImplementedError(
                f"Replay for shadow version {shadow_config.version.value} is not implemented"
            )
        shadow_impl.measurement_bases = measurement_bases
        shadow_impl.reconstruct_classical_shadow(measurement_outcomes, measurement_bases)

        # Use observables from manifest if not provided
        if observables is None:
            observables = [
                Observable(obs_dict["pauli"], obs_dict.get("coefficient", 1.0))
                for obs_dict in manifest.schema.observables
            ]

        # Estimate all observables
        estimates: dict[str, dict[str, object]] = {}
        for obs in observables:
            estimate = shadow_impl.estimate_observable(obs)
            estimates[str(obs)] = {
                "expectation_value": estimate.expectation_value,
                "variance": estimate.variance,
                "ci_95": estimate.confidence_interval,
                "ci_width": estimate.ci_width,
            }

        return EstimationResult(
            observables=estimates,
            shots_used=manifest.schema.shadows.shadow_size,
            execution_time=0.0,  # No execution time for replay
            backend_name=manifest.schema.backend.backend_name,
            experiment_id=experiment_id,
            manifest_path=str(manifest_path),
            shot_data_path=manifest.schema.shot_data_path,
            mitigation_confusion_matrix_path=resolved_confusion_matrix_path,
        )

    def _create_manifest(
        self,
        experiment_id: str,
        circuit: QuantumCircuit,
        observables: list[Observable],
        estimates: dict[str, dict[str, object]],
        shadow_size: int,
        execution_time: float,
        shot_data_path: Path,
    ) -> ProvenanceManifest:
        """Create provenance manifest for the experiment."""
        import sys

        import qiskit

        # Circuit fingerprint
        try:
            qasm_str = qasm3.dumps(circuit)
        except Exception:
            qasm_str = circuit.qasm()

        gate_counts: dict[str, int] = {}
        for instruction in circuit.data:
            gate_name = instruction.operation.name
            gate_counts[gate_name] = gate_counts.get(gate_name, 0) + 1

        circuit_hash = hashlib.sha256(qasm_str.encode()).hexdigest()[:16]

        circuit_fp = CircuitFingerprint(
            qasm3=qasm_str,
            num_qubits=circuit.num_qubits,
            depth=circuit.depth(),
            gate_counts=gate_counts,
            circuit_hash=circuit_hash,
        )

        # Backend snapshot
        backend_snapshot = self._backend_snapshot or create_backend_snapshot(self.backend)

        # Shadows config
        shadows_config = ShadowsConfig.model_validate(
            {
                "version": self.shadow_config.version.value,
                "shadow_size": shadow_size,
                "measurement_ensemble": self.shadow_config.measurement_ensemble.value,
                "noise_model_path": self.shadow_config.noise_model_path,
                "inverse_channel_applied": self.shadow_config.apply_inverse_channel,
                "fermionic_mode": self.shadow_config.fermionic_mode,
                "rdm_order": self.shadow_config.rdm_order,
                "adaptive": self.shadow_config.adaptive,
                "target_observables": self.shadow_config.target_observables,
                "bayesian_inference": self.shadow_config.bayesian_inference,
                "bootstrap_samples": self.shadow_config.bootstrap_samples,
            }
        )

        # Resource usage
        resource_usage = ResourceUsage.model_validate(
            {
                "total_shots": shadow_size,
                "execution_time_seconds": execution_time,
                "queue_time_seconds": None,
                "estimated_cost_usd": None,
                "credits_used": None,
                "classical_compute_seconds": None,
            }
        )

        metadata = {}
        if self._backend_descriptor:
            metadata["backend_descriptor"] = self._backend_descriptor

        # Create manifest
        shot_checksum = compute_file_checksum(shot_data_path)

        mitigation_config = self.mitigation_config.model_copy(deep=True)
        confusion_path = mitigation_config.confusion_matrix_path
        if confusion_path:
            mitigation_config.confusion_matrix_checksum = compute_file_checksum(confusion_path)

        manifest_schema = ManifestSchema(
            experiment_id=experiment_id,
            experiment_name=None,
            circuit=circuit_fp,
            observables=[
                {"pauli": obs.pauli_string, "coefficient": obs.coefficient} for obs in observables
            ],
            backend=backend_snapshot,
            mitigation=mitigation_config,
            shadows=shadows_config,
            shot_data_path=str(shot_data_path),
            shot_data_checksum=shot_checksum,
            results_summary=estimates,
            resource_usage=resource_usage,
            metadata=metadata,
            random_seed=self.shadow_config.random_seed,
            quartumse_version=__version__,
            qiskit_version=qiskit.__version__,
            python_version=f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
        )

        return ProvenanceManifest(manifest_schema)

__init__(backend, shadow_config=None, mitigation_config=None, data_dir=None)

Initialize shadow estimator.

Parameters:

Name Type Description Default
backend Backend | str

Qiskit backend or backend name (e.g., "aer_simulator")

required
shadow_config ShadowConfig | None

Classical shadows configuration

None
mitigation_config MitigationConfig | None

Error mitigation configuration

None
data_dir str | Path | None

Directory for storing shot data and manifests

None
Source code in src/quartumse/estimator/shadow_estimator.py
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
def __init__(
    self,
    backend: Backend | str,
    shadow_config: ShadowConfig | None = None,
    mitigation_config: MitigationConfig | None = None,
    data_dir: str | Path | None = None,
):
    """
    Initialize shadow estimator.

    Args:
        backend: Qiskit backend or backend name (e.g., "aer_simulator")
        shadow_config: Classical shadows configuration
        mitigation_config: Error mitigation configuration
        data_dir: Directory for storing shot data and manifests
    """
    # Handle backend
    self._backend_descriptor: str | None = None
    self._backend_snapshot: BackendSnapshot | None = None

    if isinstance(backend, str):
        self._backend_descriptor = backend
        if ":" in backend:
            resolved_backend, snapshot = resolve_backend(backend)
            backend = resolved_backend
            self._backend_snapshot = snapshot
        elif backend == "aer_simulator":
            backend = AerSimulator()
            self._backend_snapshot = create_backend_snapshot(backend)
        else:
            raise ValueError(f"Unknown backend string: {backend}")
    else:
        self._backend_descriptor = getattr(backend, "name", None)

    super().__init__(backend, shadow_config)

    self._runtime_sampler: SamplerPrimitive | None = None
    self._runtime_sampler_checked = False
    self._use_runtime_sampler = is_ibm_runtime_backend(self.backend)

    self.shadow_config = shadow_config or ShadowConfig.model_validate({})
    self.mitigation_config = mitigation_config or MitigationConfig()
    self.data_dir = Path(data_dir) if data_dir else Path("./data")
    self.data_dir.mkdir(parents=True, exist_ok=True)

    self.measurement_error_mitigation: MeasurementErrorMitigation | None = None
    self._mem_required = (
        self.shadow_config.version == ShadowVersion.V1_NOISE_AWARE
        or self.shadow_config.apply_inverse_channel
        or ("MEM" in self.mitigation_config.techniques)
    )
    if self._mem_required:
        self.measurement_error_mitigation = MeasurementErrorMitigation(self.backend)

    # Initialize shadow implementation based on version
    self.shadow_impl: ClassicalShadows = self._create_shadow_implementation()

    # Initialize shot data writer
    self.shot_data_writer = ShotDataWriter(self.data_dir)

estimate(circuit, observables, target_precision=None, save_manifest=True)

Estimate observables using classical shadows.

Workflow: 1. Generate shadow measurement circuits 2. Transpile and execute on backend 3. Reconstruct shadow snapshots 4. Estimate all observables 5. Generate provenance manifest

Source code in src/quartumse/estimator/shadow_estimator.py
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
def estimate(
    self,
    circuit: QuantumCircuit,
    observables: list[Observable],
    target_precision: float | None = None,
    save_manifest: bool = True,
) -> EstimationResult:
    """
    Estimate observables using classical shadows.

    Workflow:
    1. Generate shadow measurement circuits
    2. Transpile and execute on backend
    3. Reconstruct shadow snapshots
    4. Estimate all observables
    5. Generate provenance manifest
    """
    experiment_id = str(uuid.uuid4())
    start_time = time.time()

    # Determine shadow size
    if target_precision:
        required_sizes = [
            self.shadow_impl.estimate_shadow_size_needed(obs, target_precision)
            for obs in observables
        ]
        shadow_size = max(required_sizes) if required_sizes else self.shadow_config.shadow_size
        if shadow_size <= 0:
            raise ValueError("Shadow size estimation produced a non-positive value")
        self.shadow_config.shadow_size = shadow_size
        self.shadow_impl.config.shadow_size = shadow_size
    else:
        shadow_size = self.shadow_config.shadow_size
        self.shadow_impl.config.shadow_size = shadow_size

    # Generate shadow measurement circuits
    shadow_circuits = self.shadow_impl.generate_measurement_circuits(circuit, shadow_size)

    # Calibrate measurement error mitigation if required
    if isinstance(self.shadow_impl, NoiseAwareRandomLocalCliffordShadows):
        mem_params = self.mitigation_config.parameters
        mem_shots = int(mem_params.get("mem_shots", 4096))
        mem_qubits_param = mem_params.get("mem_qubits")
        if mem_qubits_param is None:
            mem_qubits = list(range(circuit.num_qubits))
        elif isinstance(mem_qubits_param, (list, tuple)):
            mem_qubits = [int(q) for q in mem_qubits_param]
        else:
            mem_qubits = [int(mem_qubits_param)]

        mem_force = bool(mem_params.get("mem_force_calibration", False))
        run_options = mem_params.get("mem_run_options", {})
        mem_confusion_path_str = self.mitigation_config.confusion_matrix_path

        if mem_confusion_path_str and not mem_force:
            try:
                self.shadow_impl.mem.load_confusion_matrix(mem_confusion_path_str)
                metadata = self.shadow_impl.mem.get_confusion_metadata()
                if isinstance(metadata.get("shots_per_state"), (int, float)):
                    mem_shots = int(metadata["shots_per_state"])
                    mem_params["mem_shots"] = mem_shots
                if isinstance(metadata.get("qubits"), (list, tuple)):
                    mem_qubits = [int(q) for q in metadata["qubits"]]
                    mem_params["mem_qubits"] = mem_qubits
            except FileNotFoundError:
                LOGGER.warning(
                    "Configured confusion matrix %s not found; recalibrating.",
                    mem_confusion_path_str,
                )
                mem_confusion_path_str = None

        if (
            self.shadow_impl.mem.confusion_matrix is None
            or mem_force
            or not mem_confusion_path_str
        ):
            mem_dir = self.data_dir / "mem"
            mem_dir.mkdir(parents=True, exist_ok=True)
            confusion_matrix_path = mem_dir / f"{experiment_id}.npz"
            saved_confusion_path = self.shadow_impl.mem.calibrate(
                mem_qubits,
                shots=mem_shots,
                run_options=run_options,
                output_path=confusion_matrix_path,
            )
            mem_confusion_path = (
                saved_confusion_path
                if saved_confusion_path is not None
                else confusion_matrix_path
            )
            self.mitigation_config.confusion_matrix_path = str(mem_confusion_path.resolve())
            mem_confusion_path_str = self.mitigation_config.confusion_matrix_path
            self.shadow_impl.mem.confusion_matrix_path = Path(mem_confusion_path_str)
        else:
            self.mitigation_config.confusion_matrix_path = mem_confusion_path_str

        if "MEM" not in self.mitigation_config.techniques:
            self.mitigation_config.techniques.append("MEM")
        mem_params["mem_qubits"] = mem_qubits
        mem_params["mem_shots"] = mem_shots

    # Transpile for backend
    transpiled_circuits = transpile(shadow_circuits, backend=self.backend)

    # Respect backend batching limits
    max_experiments = None
    backend_config = None
    if hasattr(self.backend, "configuration"):
        try:
            backend_config = self.backend.configuration()
        except Exception:
            backend_config = None

    if backend_config is not None:
        max_experiments = getattr(backend_config, "max_experiments", None)

    if isinstance(max_experiments, np.integer):
        max_experiments = int(max_experiments)

    if not isinstance(max_experiments, int) or max_experiments <= 0:
        # Use safe default batch size for IBM backends to avoid submission failures
        max_experiments = 500
        print(
            f"Warning: Backend max_experiments unavailable or invalid. "
            f"Using safe default batch size: {max_experiments}"
        )

    measurement_outcomes_list: list[np.ndarray] = []

    sampler = self._get_runtime_sampler()

    for start_idx in range(0, len(transpiled_circuits), max_experiments):
        circuit_batch = transpiled_circuits[start_idx : start_idx + max_experiments]
        if sampler is not None:
            job = sampler.run(list(circuit_batch), shots=1)
            result = job.result()

            for batch_idx, _ in enumerate(circuit_batch):
                counts = result[batch_idx].data.meas.get_counts()
                bitstring = list(counts.keys())[0].replace(" ", "")
                outcomes = np.array([int(b) for b in bitstring[::-1]], dtype=int)
                measurement_outcomes_list.append(outcomes)
        else:
            job = self.backend.run(circuit_batch, shots=1)  # Each circuit is one shadow
            result = job.result()

            for batch_idx, _ in enumerate(circuit_batch):
                counts = result.get_counts(batch_idx)
                bitstring = list(counts.keys())[0].replace(" ", "")
                outcomes = np.array([int(b) for b in bitstring[::-1]], dtype=int)
                measurement_outcomes_list.append(outcomes)

    if len(measurement_outcomes_list) != shadow_size:
        raise RuntimeError(
            "Collected measurement outcomes do not match the requested shadow size."
        )

    measurement_outcomes = np.asarray(measurement_outcomes_list, dtype=int)

    measurement_bases = self.shadow_impl.measurement_bases
    if measurement_bases is None:
        raise ValueError("Shadow implementation did not record measurement bases.")
    measurement_bases = np.asarray(measurement_bases, dtype=int)
    self.shadow_impl.measurement_bases = measurement_bases

    # Save shot data to Parquet
    shot_data_path = self.shot_data_writer.save_shadow_measurements(
        experiment_id=experiment_id,
        measurement_bases=measurement_bases,
        measurement_outcomes=measurement_outcomes,
        num_qubits=circuit.num_qubits,
    )

    # Reconstruct shadows
    self.shadow_impl.reconstruct_classical_shadow(measurement_outcomes, measurement_bases)

    # Estimate all observables
    estimates: dict[str, dict[str, object]] = {}
    for obs in observables:
        estimate = self.shadow_impl.estimate_observable(obs)
        estimates[str(obs)] = {
            "expectation_value": estimate.expectation_value,
            "variance": estimate.variance,
            "ci_95": estimate.confidence_interval,
            "ci_width": estimate.ci_width,
        }

    execution_time = time.time() - start_time

    # Create provenance manifest
    if save_manifest:
        manifest = self._create_manifest(
            experiment_id,
            circuit,
            observables,
            estimates,
            shadow_size,
            execution_time,
            shot_data_path,
        )
        manifest_path = self.data_dir / "manifests" / f"{experiment_id}.json"
        manifest_path.parent.mkdir(parents=True, exist_ok=True)
        manifest.to_json(manifest_path)
    else:
        manifest_path = None

    return EstimationResult(
        observables=estimates,
        shots_used=shadow_size,
        execution_time=execution_time,
        backend_name=self.backend.name,
        experiment_id=experiment_id,
        manifest_path=str(manifest_path) if manifest_path else None,
        shot_data_path=str(shot_data_path),
        mitigation_confusion_matrix_path=self.mitigation_config.confusion_matrix_path,
    )

estimate_shots_needed(observables, target_precision)

Estimate shadow size needed for target precision.

Source code in src/quartumse/estimator/shadow_estimator.py
368
369
370
371
372
373
374
375
376
def estimate_shots_needed(self, observables: list[Observable], target_precision: float) -> int:
    """Estimate shadow size needed for target precision."""
    # Use worst-case observable
    max_shadow_size = 0
    for obs in observables:
        size = self.shadow_impl.estimate_shadow_size_needed(obs, target_precision)
        max_shadow_size = max(max_shadow_size, size)

    return max_shadow_size

replay_from_manifest(manifest_path, observables=None)

Replay an experiment from a saved manifest and shot data.

This allows re-estimation of observables from previously collected shot data without re-executing circuits on the backend.

Parameters:

Name Type Description Default
manifest_path str | Path

Path to the provenance manifest JSON file

required
observables list[Observable] | None

Optional new list of observables to estimate. If None, uses observables from the original manifest.

None

Returns:

Type Description
EstimationResult

EstimationResult with re-estimated observables

Source code in src/quartumse/estimator/shadow_estimator.py
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
def replay_from_manifest(
    self,
    manifest_path: str | Path,
    observables: list[Observable] | None = None,
) -> EstimationResult:
    """
    Replay an experiment from a saved manifest and shot data.

    This allows re-estimation of observables from previously collected shot data
    without re-executing circuits on the backend.

    Args:
        manifest_path: Path to the provenance manifest JSON file
        observables: Optional new list of observables to estimate. If None,
                    uses observables from the original manifest.

    Returns:
        EstimationResult with re-estimated observables
    """
    manifest_path = Path(manifest_path)
    if not manifest_path.exists():
        raise FileNotFoundError(f"Manifest not found: {manifest_path}")

    # Load manifest
    manifest = ProvenanceManifest.from_json(manifest_path)
    experiment_id = manifest.schema.experiment_id

    # Load shot data
    measurement_bases, measurement_outcomes, num_qubits = (
        self.shot_data_writer.load_shadow_measurements(experiment_id)
    )

    if manifest.schema.shadows is None:
        raise ValueError(
            "Manifest does not contain classical shadows configuration information."
        )

    # Reconstruct shadows with loaded data
    # Create temporary shadow implementation if needed
    shadow_payload = manifest.schema.shadows.model_dump()
    shadow_payload["random_seed"] = manifest.schema.random_seed
    shadow_config = ShadowConfig.model_validate(shadow_payload)

    resolved_confusion_matrix_path: str | None = (
        manifest.schema.mitigation.confusion_matrix_path
    )

    if shadow_config.version == ShadowVersion.V0_BASELINE:
        shadow_impl = RandomLocalCliffordShadows(shadow_config)
    elif shadow_config.version == ShadowVersion.V1_NOISE_AWARE:
        confusion_matrix_path_str = manifest.schema.mitigation.confusion_matrix_path

        if not confusion_matrix_path_str:
            raise FileNotFoundError(
                "Noise-aware manifest does not include a persisted confusion matrix path. "
                "Re-run estimation or provide the saved calibration artifact before replaying."
            )

        raw_confusion_path = Path(confusion_matrix_path_str)
        candidate_paths = [raw_confusion_path]

        if not raw_confusion_path.is_absolute():
            candidate_paths.append((manifest_path.parent / raw_confusion_path).resolve())
            candidate_paths.append((self.data_dir / raw_confusion_path).resolve())

        candidate_paths.append((self.data_dir / "mem" / raw_confusion_path.name).resolve())
        candidate_paths.append(
            (manifest_path.parent / "mem" / raw_confusion_path.name).resolve()
        )

        confusion_matrix_path: Path | None = None
        for candidate in candidate_paths:
            if candidate and candidate.exists():
                confusion_matrix_path = candidate
                break

        if confusion_matrix_path is None:
            raise FileNotFoundError(
                "Unable to locate the persisted confusion matrix required for noise-aware replay. "
                f"Looked for {raw_confusion_path} and related paths."
            )

        with np.load(confusion_matrix_path, allow_pickle=False) as archive:
            if "confusion_matrix" not in archive:
                raise ValueError(
                    "Confusion matrix archive is missing the 'confusion_matrix' dataset."
                )
            confusion_matrix = archive["confusion_matrix"]

        mem = MeasurementErrorMitigation(self.backend)
        mem.confusion_matrix = confusion_matrix
        mem.confusion_matrix_path = confusion_matrix_path.resolve()
        mem._calibrated_qubits = tuple(range(num_qubits))

        shadow_impl = NoiseAwareRandomLocalCliffordShadows(shadow_config, mem)
        resolved_confusion_matrix_path = str(confusion_matrix_path.resolve())
    else:
        raise NotImplementedError(
            f"Replay for shadow version {shadow_config.version.value} is not implemented"
        )
    shadow_impl.measurement_bases = measurement_bases
    shadow_impl.reconstruct_classical_shadow(measurement_outcomes, measurement_bases)

    # Use observables from manifest if not provided
    if observables is None:
        observables = [
            Observable(obs_dict["pauli"], obs_dict.get("coefficient", 1.0))
            for obs_dict in manifest.schema.observables
        ]

    # Estimate all observables
    estimates: dict[str, dict[str, object]] = {}
    for obs in observables:
        estimate = shadow_impl.estimate_observable(obs)
        estimates[str(obs)] = {
            "expectation_value": estimate.expectation_value,
            "variance": estimate.variance,
            "ci_95": estimate.confidence_interval,
            "ci_width": estimate.ci_width,
        }

    return EstimationResult(
        observables=estimates,
        shots_used=manifest.schema.shadows.shadow_size,
        execution_time=0.0,  # No execution time for replay
        backend_name=manifest.schema.backend.backend_name,
        experiment_id=experiment_id,
        manifest_path=str(manifest_path),
        shot_data_path=manifest.schema.shot_data_path,
        mitigation_confusion_matrix_path=resolved_confusion_matrix_path,
    )

SuiteType

Bases: Enum

Category of observable suite.

Source code in src/quartumse/observables/suites.py
39
40
41
42
43
44
45
46
class SuiteType(Enum):
    """Category of observable suite."""

    WORKLOAD = "workload"  # Task-aligned (what practitioners measure)
    STRESS = "stress"  # Large sets for scaling tests
    POSTHOC = "posthoc"  # Library for post-hoc querying tests
    COMMUTING = "commuting"  # All-commuting baseline (grouped measurement advantage)
    DIAGNOSTIC = "diagnostic"  # System diagnostics (readout, crosstalk)

SummaryRow

Bases: BaseModel

Summary statistics for (protocol, circuit, N) combination (§10.3).

This schema defines aggregated metrics across observables and replicates.

Source code in src/quartumse/io/schemas.py
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
class SummaryRow(BaseModel):
    """Summary statistics for (protocol, circuit, N) combination (§10.3).

    This schema defines aggregated metrics across observables and replicates.
    """

    # === Identifiers ===
    run_id: str
    circuit_id: str
    protocol_id: str
    N_total: int
    noise_profile_id: str = "ideal"

    # === Counts ===
    n_observables: int
    n_replicates: int

    # === SE statistics ===
    se_mean: float
    se_median: float
    se_p90: float
    se_p95: float
    se_max: float

    # === Error statistics (if truth available) ===
    abs_err_mean: float | None = None
    abs_err_median: float | None = None
    abs_err_p90: float | None = None
    abs_err_p95: float | None = None
    abs_err_max: float | None = None
    rmse: float | None = None

    # === Attainment ===
    attainment_epsilon: float | None = None  # The epsilon used
    attainment_fraction: float | None = None  # Fraction with SE <= epsilon

    # === Coverage (if CIs computed) ===
    coverage_per_observable: float | None = None  # Mean per-obs coverage
    coverage_family_wise: float | None = None  # Family-wise coverage

    # === Resource totals ===
    total_quantum_time_s: float | None = None
    total_classical_time_s: float | None = None

SweepConfig dataclass

Configuration for a benchmark sweep.

Attributes:

Name Type Description
run_id str

Unique identifier for this run.

methodology_version str

Version of the methodology.

protocols list[Protocol]

List of protocol instances to evaluate.

circuits list[tuple[str, Any]]

List of (circuit_id, circuit) tuples.

observable_sets list[tuple[str, ObservableSet]]

List of (obs_set_id, ObservableSet) tuples.

n_grid list[int]

Shot budget grid.

n_replicates int

Number of replicates per configuration.

noise_profiles list[str]

List of noise profile IDs.

seeds dict[str, int]

Seed configuration.

tasks list[str]

List of task IDs to run.

Source code in src/quartumse/tasks/sweep.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
@dataclass
class SweepConfig:
    """Configuration for a benchmark sweep.

    Attributes:
        run_id: Unique identifier for this run.
        methodology_version: Version of the methodology.
        protocols: List of protocol instances to evaluate.
        circuits: List of (circuit_id, circuit) tuples.
        observable_sets: List of (obs_set_id, ObservableSet) tuples.
        n_grid: Shot budget grid.
        n_replicates: Number of replicates per configuration.
        noise_profiles: List of noise profile IDs.
        seeds: Seed configuration.
        tasks: List of task IDs to run.
    """

    run_id: str = field(default_factory=lambda: f"run_{uuid4().hex[:12]}")
    methodology_version: str = "3.0.0"
    protocols: list[Protocol] = field(default_factory=list)
    circuits: list[tuple[str, Any]] = field(default_factory=list)
    observable_sets: list[tuple[str, ObservableSet]] = field(default_factory=list)
    n_grid: list[int] = field(default_factory=lambda: [100, 500, 1000, 5000, 10000])
    n_replicates: int = 10
    noise_profiles: list[str] = field(default_factory=lambda: ["ideal"])
    seeds: dict[str, int] = field(default_factory=lambda: {"base": 42})
    seed_policy: str = "base_replicate_config"
    tasks: list[str] = field(default_factory=list)
    store_raw_shots: bool = True
    timeout_per_protocol_s: float | None = None
    hw_timing_profile: Any | None = None

SweepOrchestrator

Orchestrator for running benchmark sweeps.

Manages the execution of protocols across the sweep grid and collects results.

Source code in src/quartumse/tasks/sweep.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
class SweepOrchestrator:
    """Orchestrator for running benchmark sweeps.

    Manages the execution of protocols across the sweep grid
    and collects results.
    """

    def __init__(
        self,
        config: SweepConfig,
        executor: Callable | None = None,
    ) -> None:
        """Initialize sweep orchestrator.

        Args:
            config: Sweep configuration.
            executor: Optional custom executor for running protocols.
        """
        self.config = config
        self.executor = executor or self._default_executor
        self.results = LongFormResultSet()
        self.progress = SweepProgress()
        self.raw_shot_records: list[dict] = []

    def _default_executor(
        self,
        protocol: Protocol,
        circuit: Any,
        observable_set: ObservableSet,
        n_shots: int,
        seed: int,
        noise_profile: str,
    ) -> Estimates:
        """Default protocol executor using physical measurement paths."""
        from qiskit import QuantumCircuit

        backend = self._resolve_backend(noise_profile)
        if circuit is None:
            circuit = QuantumCircuit(observable_set.n_qubits)

        return protocol.run(
            circuit=circuit,
            observable_set=observable_set,
            total_budget=n_shots,
            backend=backend,
            seed=seed,
            timeout_s=self.config.timeout_per_protocol_s,
            hw_timing_profile=self.config.hw_timing_profile,
        )

    def _resolve_backend(self, noise_profile: str) -> Any:
        """Resolve a backend sampler from a noise profile identifier."""
        from ..backends.sampler import IdealSampler, NoisySampler
        from ..noise.profiles import NoiseType, get_profile

        if noise_profile == "ideal":
            return IdealSampler()

        try:
            profile = get_profile(noise_profile)
        except KeyError:
            return NoisySampler(noise_profile_id=noise_profile)

        if profile.noise_type == NoiseType.READOUT_BITFLIP:
            return NoisySampler(
                noise_profile_id=profile.profile_id,
                readout_error=profile.parameters.get("p", 0.0),
            )
        if profile.noise_type == NoiseType.DEPOLARIZING:
            return NoisySampler(
                noise_profile_id=profile.profile_id,
                depol_1q=profile.parameters.get("p1", 0.0),
                depol_2q=profile.parameters.get("p2", 0.0),
            )

        return NoisySampler(noise_profile_id=profile.profile_id)

    def compute_total_configs(self) -> int:
        """Compute total number of configurations."""
        n_protocols = len(self.config.protocols)
        n_circuits = len(self.config.circuits)
        n_obs_sets = len(self.config.observable_sets)
        n_budgets = len(self.config.n_grid)
        n_replicates = self.config.n_replicates
        n_noise = len(self.config.noise_profiles)

        return n_protocols * n_circuits * n_obs_sets * n_budgets * n_replicates * n_noise

    def generate_seeds(self, replicate_id: int, config_id: int) -> dict[str, int]:
        """Generate reproducible seeds for a configuration.

        Args:
            replicate_id: Replicate number.
            config_id: Configuration index.

        Returns:
            Dict with seed_policy, seed_protocol, seed_acquire, seed_bootstrap.
        """
        base = self.config.seeds.get("base", 42)
        rng = np.random.default_rng(base + replicate_id * 1000 + config_id)

        return {
            "seed_policy": self.config.seed_policy,
            "seed_protocol": int(rng.integers(0, 2**31)),
            "seed_acquire": int(rng.integers(0, 2**31)),
            "seed_bootstrap": int(rng.integers(0, 2**31)),
        }

    def run(
        self,
        progress_callback: Callable[[SweepProgress], None] | None = None,
    ) -> LongFormResultSet:
        """Run the benchmark sweep.

        Args:
            progress_callback: Optional callback for progress updates.

        Returns:
            LongFormResultSet with all results.
        """
        self.progress = SweepProgress(
            total_configs=self.compute_total_configs(),
            start_time=datetime.now(),
        )

        config_id = 0

        for protocol in self.config.protocols:
            self.progress.current_protocol = protocol.protocol_id

            for circuit_id, circuit in self.config.circuits:
                self.progress.current_circuit = circuit_id

                for obs_set_id, observable_set in self.config.observable_sets:
                    for n in self.config.n_grid:
                        self.progress.current_n = n

                        for noise_profile in self.config.noise_profiles:
                            for rep in range(self.config.n_replicates):
                                self.progress.current_replicate = rep

                                try:
                                    seeds = self.generate_seeds(rep, config_id)

                                    estimates = self.executor(
                                        protocol=protocol,
                                        circuit=circuit,
                                        observable_set=observable_set,
                                        n_shots=n,
                                        seed=seeds["seed_protocol"],
                                        noise_profile=noise_profile,
                                    )

                                    # Convert to LongFormRows
                                    rows = self._estimates_to_rows(
                                        estimates=estimates,
                                        circuit_id=circuit_id,
                                        obs_set_id=obs_set_id,
                                        observable_set=observable_set,
                                        n=n,
                                        replicate_id=rep,
                                        noise_profile=noise_profile,
                                        seeds=seeds,
                                    )
                                    self.results.add_many(rows)

                                    # Collect raw shot data
                                    if self.config.store_raw_shots and estimates.raw_chunks:
                                        self._collect_raw_shots(
                                            estimates=estimates,
                                            circuit_id=circuit_id,
                                            n=n,
                                            replicate_id=rep,
                                            noise_profile=noise_profile,
                                        )

                                except Exception as e:
                                    self.progress.errors.append(
                                        {
                                            "protocol": protocol.protocol_id,
                                            "circuit": circuit_id,
                                            "n": n,
                                            "replicate": rep,
                                            "error": str(e),
                                        }
                                    )

                                self.progress.completed_configs += 1
                                config_id += 1

                                if progress_callback:
                                    progress_callback(self.progress)

        return self.results

    def _estimates_to_rows(
        self,
        estimates: Estimates,
        circuit_id: str,
        obs_set_id: str,
        observable_set: ObservableSet,
        n: int,
        replicate_id: int,
        noise_profile: str,
        seeds: dict[str, int],
    ) -> list[LongFormRow]:
        """Convert Estimates to LongFormRows."""
        rows = []
        builder = LongFormResultBuilder()

        for est in estimates.estimates:
            obs = observable_set.get_by_id(est.observable_id)

            row = (
                builder.reset()
                .with_run_id(self.config.run_id)
                .with_methodology_version(self.config.methodology_version)
                .with_circuit(circuit_id, n_qubits=observable_set.n_qubits)
                .with_observable(
                    observable_id=est.observable_id,
                    observable_type=obs.observable_type.value,
                    locality=obs.locality,
                    coefficient=obs.coefficient,
                    observable_set_id=obs_set_id,
                    group_id=obs.group_id,
                    M_total=len(observable_set),
                )
                .with_protocol(estimates.protocol_id, estimates.protocol_version)
                .with_backend("simulator", noise_profile_id=noise_profile)
                .with_replicate(replicate_id)
                .with_seeds(
                    seed_policy=seeds["seed_policy"],
                    seed_protocol=seeds["seed_protocol"],
                    seed_acquire=seeds["seed_acquire"],
                    seed_bootstrap=seeds.get("seed_bootstrap"),
                )
                .with_budget(N_total=n, n_settings=est.n_settings)
                .with_estimate(
                    estimate=est.estimate,
                    se=est.se,
                    ci_low=est.ci.ci_low if est.ci else None,
                    ci_high=est.ci.ci_high if est.ci else None,
                )
                .with_timing_breakdown(
                    timing=estimates.timing_breakdown,
                    timed_out=estimates.timed_out,
                    n_shots_completed=estimates.n_shots_completed,
                )
                .build()
            )
            rows.append(row)

        return rows

    def _collect_raw_shots(
        self,
        estimates: Estimates,
        circuit_id: str,
        n: int,
        replicate_id: int,
        noise_profile: str,
    ) -> None:
        """Extract raw shot data from estimates and append to raw_shot_records."""
        protocol_id = estimates.protocol_id or ""
        for chunk in estimates.raw_chunks:
            if chunk.bitstrings:
                # Check metadata for measurement bases (shadows protocol)
                meta_bases = chunk.metadata.get("measurement_bases") if chunk.metadata else None
                for setting_id, bitstring_list in chunk.bitstrings.items():
                    self.raw_shot_records.append(
                        {
                            "protocol_id": protocol_id,
                            "circuit_id": circuit_id,
                            "N_total": n,
                            "replicate_id": replicate_id,
                            "noise_profile": noise_profile,
                            "setting_id": setting_id,
                            "bitstrings": json.dumps(bitstring_list),
                            "measurement_bases": json.dumps(meta_bases.tolist())
                            if meta_bases is not None
                            else None,
                        }
                    )
            elif chunk.outcomes is not None:
                # Array-based format (e.g., shadows)
                unique_settings = (
                    np.unique(chunk.setting_indices)
                    if chunk.setting_indices is not None
                    else [0]
                )
                for si in unique_settings:
                    if chunk.setting_indices is not None:
                        mask = chunk.setting_indices == si
                        outcomes = chunk.outcomes[mask]
                        bases = chunk.basis_choices[mask] if chunk.basis_choices is not None else None
                    else:
                        outcomes = chunk.outcomes
                        bases = chunk.basis_choices

                    self.raw_shot_records.append(
                        {
                            "protocol_id": protocol_id,
                            "circuit_id": circuit_id,
                            "N_total": n,
                            "replicate_id": replicate_id,
                            "noise_profile": noise_profile,
                            "setting_id": str(si),
                            "bitstrings": json.dumps(outcomes.tolist()),
                            "measurement_bases": json.dumps(bases.tolist())
                            if bases is not None
                            else None,
                        }
                    )

    def create_manifest(self) -> RunManifest:
        """Create run manifest for this sweep."""
        return RunManifest(
            run_id=self.config.run_id,
            methodology_version=self.config.methodology_version,
            created_at=self.progress.start_time,
            git_commit_hash=get_git_commit_hash(),
            quartumse_version=get_quartumse_version(),
            python_version=get_python_version(),
            environment_lock=get_environment_lock(),
            circuits=[c[0] for c in self.config.circuits],
            observable_sets=[o[0] for o in self.config.observable_sets],
            protocols=[p.protocol_id for p in self.config.protocols],
            N_grid=self.config.n_grid,
            n_replicates=self.config.n_replicates,
            noise_profiles=self.config.noise_profiles,
            status="completed" if not self.progress.errors else "partial_success",
            completed_at=datetime.now(),
            config={
                "seeds": self.config.seeds,
                "seed_policy": self.config.seed_policy,
                "tasks": self.config.tasks,
            },
        )

__init__(config, executor=None)

Initialize sweep orchestrator.

Parameters:

Name Type Description Default
config SweepConfig

Sweep configuration.

required
executor Callable | None

Optional custom executor for running protocols.

None
Source code in src/quartumse/tasks/sweep.py
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
def __init__(
    self,
    config: SweepConfig,
    executor: Callable | None = None,
) -> None:
    """Initialize sweep orchestrator.

    Args:
        config: Sweep configuration.
        executor: Optional custom executor for running protocols.
    """
    self.config = config
    self.executor = executor or self._default_executor
    self.results = LongFormResultSet()
    self.progress = SweepProgress()
    self.raw_shot_records: list[dict] = []

compute_total_configs()

Compute total number of configurations.

Source code in src/quartumse/tasks/sweep.py
177
178
179
180
181
182
183
184
185
186
def compute_total_configs(self) -> int:
    """Compute total number of configurations."""
    n_protocols = len(self.config.protocols)
    n_circuits = len(self.config.circuits)
    n_obs_sets = len(self.config.observable_sets)
    n_budgets = len(self.config.n_grid)
    n_replicates = self.config.n_replicates
    n_noise = len(self.config.noise_profiles)

    return n_protocols * n_circuits * n_obs_sets * n_budgets * n_replicates * n_noise

create_manifest()

Create run manifest for this sweep.

Source code in src/quartumse/tasks/sweep.py
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
def create_manifest(self) -> RunManifest:
    """Create run manifest for this sweep."""
    return RunManifest(
        run_id=self.config.run_id,
        methodology_version=self.config.methodology_version,
        created_at=self.progress.start_time,
        git_commit_hash=get_git_commit_hash(),
        quartumse_version=get_quartumse_version(),
        python_version=get_python_version(),
        environment_lock=get_environment_lock(),
        circuits=[c[0] for c in self.config.circuits],
        observable_sets=[o[0] for o in self.config.observable_sets],
        protocols=[p.protocol_id for p in self.config.protocols],
        N_grid=self.config.n_grid,
        n_replicates=self.config.n_replicates,
        noise_profiles=self.config.noise_profiles,
        status="completed" if not self.progress.errors else "partial_success",
        completed_at=datetime.now(),
        config={
            "seeds": self.config.seeds,
            "seed_policy": self.config.seed_policy,
            "tasks": self.config.tasks,
        },
    )

generate_seeds(replicate_id, config_id)

Generate reproducible seeds for a configuration.

Parameters:

Name Type Description Default
replicate_id int

Replicate number.

required
config_id int

Configuration index.

required

Returns:

Type Description
dict[str, int]

Dict with seed_policy, seed_protocol, seed_acquire, seed_bootstrap.

Source code in src/quartumse/tasks/sweep.py
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
def generate_seeds(self, replicate_id: int, config_id: int) -> dict[str, int]:
    """Generate reproducible seeds for a configuration.

    Args:
        replicate_id: Replicate number.
        config_id: Configuration index.

    Returns:
        Dict with seed_policy, seed_protocol, seed_acquire, seed_bootstrap.
    """
    base = self.config.seeds.get("base", 42)
    rng = np.random.default_rng(base + replicate_id * 1000 + config_id)

    return {
        "seed_policy": self.config.seed_policy,
        "seed_protocol": int(rng.integers(0, 2**31)),
        "seed_acquire": int(rng.integers(0, 2**31)),
        "seed_bootstrap": int(rng.integers(0, 2**31)),
    }

run(progress_callback=None)

Run the benchmark sweep.

Parameters:

Name Type Description Default
progress_callback Callable[[SweepProgress], None] | None

Optional callback for progress updates.

None

Returns:

Type Description
LongFormResultSet

LongFormResultSet with all results.

Source code in src/quartumse/tasks/sweep.py
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
def run(
    self,
    progress_callback: Callable[[SweepProgress], None] | None = None,
) -> LongFormResultSet:
    """Run the benchmark sweep.

    Args:
        progress_callback: Optional callback for progress updates.

    Returns:
        LongFormResultSet with all results.
    """
    self.progress = SweepProgress(
        total_configs=self.compute_total_configs(),
        start_time=datetime.now(),
    )

    config_id = 0

    for protocol in self.config.protocols:
        self.progress.current_protocol = protocol.protocol_id

        for circuit_id, circuit in self.config.circuits:
            self.progress.current_circuit = circuit_id

            for obs_set_id, observable_set in self.config.observable_sets:
                for n in self.config.n_grid:
                    self.progress.current_n = n

                    for noise_profile in self.config.noise_profiles:
                        for rep in range(self.config.n_replicates):
                            self.progress.current_replicate = rep

                            try:
                                seeds = self.generate_seeds(rep, config_id)

                                estimates = self.executor(
                                    protocol=protocol,
                                    circuit=circuit,
                                    observable_set=observable_set,
                                    n_shots=n,
                                    seed=seeds["seed_protocol"],
                                    noise_profile=noise_profile,
                                )

                                # Convert to LongFormRows
                                rows = self._estimates_to_rows(
                                    estimates=estimates,
                                    circuit_id=circuit_id,
                                    obs_set_id=obs_set_id,
                                    observable_set=observable_set,
                                    n=n,
                                    replicate_id=rep,
                                    noise_profile=noise_profile,
                                    seeds=seeds,
                                )
                                self.results.add_many(rows)

                                # Collect raw shot data
                                if self.config.store_raw_shots and estimates.raw_chunks:
                                    self._collect_raw_shots(
                                        estimates=estimates,
                                        circuit_id=circuit_id,
                                        n=n,
                                        replicate_id=rep,
                                        noise_profile=noise_profile,
                                    )

                            except Exception as e:
                                self.progress.errors.append(
                                    {
                                        "protocol": protocol.protocol_id,
                                        "circuit": circuit_id,
                                        "n": n,
                                        "replicate": rep,
                                        "error": str(e),
                                    }
                                )

                            self.progress.completed_configs += 1
                            config_id += 1

                            if progress_callback:
                                progress_callback(self.progress)

    return self.results

TaskConfig dataclass

Configuration for a benchmark task.

Attributes:

Name Type Description
task_id str

Unique identifier for the task.

task_type TaskType

Type of task.

epsilon float

Target precision (for precision-based tasks).

delta float

Global failure probability.

criterion_type CriterionType

Type of criterion to use.

fwer_method str

FWER control method.

n_grid list[int]

Shot budget grid to evaluate.

n_replicates int

Number of repetitions per configuration.

baseline_protocol_id str

Baseline protocol for SSF computation.

additional_params dict[str, Any]

Task-specific additional parameters.

Source code in src/quartumse/tasks/base.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
@dataclass
class TaskConfig:
    """Configuration for a benchmark task.

    Attributes:
        task_id: Unique identifier for the task.
        task_type: Type of task.
        epsilon: Target precision (for precision-based tasks).
        delta: Global failure probability.
        criterion_type: Type of criterion to use.
        fwer_method: FWER control method.
        n_grid: Shot budget grid to evaluate.
        n_replicates: Number of repetitions per configuration.
        baseline_protocol_id: Baseline protocol for SSF computation.
        additional_params: Task-specific additional parameters.
    """

    task_id: str
    task_type: TaskType
    epsilon: float = 0.01
    delta: float = 0.05
    criterion_type: CriterionType = CriterionType.CI_BASED
    fwer_method: str = "bonferroni"
    n_grid: list[int] = field(default_factory=lambda: [100, 500, 1000, 5000, 10000])
    n_replicates: int = 10
    baseline_protocol_id: str = "direct_grouped"
    additional_params: dict[str, Any] = field(default_factory=dict)

TaskType

Bases: str, Enum

Types of benchmark tasks.

Source code in src/quartumse/tasks/base.py
21
22
23
24
25
26
27
28
29
30
31
class TaskType(str, Enum):
    """Types of benchmark tasks."""

    WORST_CASE = "worst_case"  # Task 1
    AVERAGE_TARGET = "average_target"  # Task 2
    FIXED_BUDGET = "fixed_budget"  # Task 3
    DOMINANCE = "dominance"  # Task 4
    PILOT_SELECTION = "pilot_selection"  # Task 5
    BIAS_VARIANCE = "bias_variance"  # Task 6
    NOISE_SENSITIVITY = "noise_sensitivity"  # Task 7
    ADAPTIVE_EFFICIENCY = "adaptive_efficiency"  # Task 8

analyze_by_locality(long_form_results, n_total=None, locality_map=None)

Analyze protocol performance grouped by observable locality.

Parameters:

Name Type Description Default
long_form_results list[LongFormRow]

Long-form benchmark results

required
n_total int | None

Specific shot budget to analyze (None = use max)

None
locality_map dict[str, int] | None

Map observable_id -> locality (auto-extracted if None)

None

Returns:

Type Description
dict[str, PropertyAnalysis]

Dict mapping protocol_id to PropertyAnalysis

Source code in src/quartumse/analysis/observable_properties.py
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
def analyze_by_locality(
    long_form_results: list[LongFormRow],
    n_total: int | None = None,
    locality_map: dict[str, int] | None = None,
) -> dict[str, PropertyAnalysis]:
    """Analyze protocol performance grouped by observable locality.

    Args:
        long_form_results: Long-form benchmark results
        n_total: Specific shot budget to analyze (None = use max)
        locality_map: Map observable_id -> locality (auto-extracted if None)

    Returns:
        Dict mapping protocol_id to PropertyAnalysis
    """
    if not long_form_results:
        return {}

    # Extract locality if not provided
    if locality_map is None:
        locality_map = _extract_locality_from_rows(long_form_results)

    # Group by protocol
    by_protocol: dict[str, list[LongFormRow]] = defaultdict(list)
    for row in long_form_results:
        by_protocol[row.protocol_id].append(row)

    # Determine N to analyze
    if n_total is None:
        n_total = max(row.N_total for row in long_form_results)

    results = {}

    for protocol_id, rows in by_protocol.items():
        # Filter to specific N
        rows_at_n = [r for r in rows if r.N_total == n_total]
        if not rows_at_n:
            continue

        # Group by locality
        by_locality: dict[int, list[LongFormRow]] = defaultdict(list)
        for row in rows_at_n:
            locality = locality_map.get(row.observable_id, 0)
            by_locality[locality].append(row)

        # Compute statistics per locality group
        locality_groups = {}
        all_localities = []
        all_ses = []

        for locality, loc_rows in sorted(by_locality.items()):
            se_values = [r.se for r in loc_rows]
            obs_ids = list({r.observable_id for r in loc_rows})

            locality_groups[locality] = LocalityGroup(
                locality=locality,
                n_observables=len(obs_ids),
                observable_ids=obs_ids,
                mean_se=float(np.mean(se_values)),
                median_se=float(np.median(se_values)),
                std_se=float(np.std(se_values)) if len(se_values) > 1 else 0.0,
                min_se=float(np.min(se_values)),
                max_se=float(np.max(se_values)),
                theoretical_variance_factor=3**locality,
            )

            # Collect for correlation
            for se in se_values:
                all_localities.append(locality)
                all_ses.append(se)

        # Compute correlation between locality and SE
        if len(all_localities) > 1 and len(set(all_localities)) > 1:
            correlation = float(np.corrcoef(all_localities, all_ses)[0, 1])

            # Linear regression: SE = a + b * locality
            X = np.array(all_localities)
            y = np.array(all_ses)
            X_mean = np.mean(X)
            y_mean = np.mean(y)
            b = np.sum((X - X_mean) * (y - y_mean)) / np.sum((X - X_mean) ** 2)
            a = y_mean - b * X_mean

            regression = {
                "intercept": float(a),
                "slope": float(b),
                "r_squared": correlation**2,
            }
        else:
            correlation = 0.0
            regression = {"intercept": 0.0, "slope": 0.0, "r_squared": 0.0}

        results[protocol_id] = PropertyAnalysis(
            protocol_id=protocol_id,
            n_total=n_total,
            by_locality=locality_groups,
            locality_correlation=correlation,
            locality_regression=regression,
        )

    return results

bootstrap_ci(data, statistic=np.mean, confidence=0.95, n_bootstrap=10000, seed=42)

Compute bootstrap confidence interval.

Parameters:

Name Type Description Default
data ndarray | list[float]

Sample data

required
statistic Callable[[ndarray], float]

Function to compute statistic (default: mean)

mean
confidence float

Confidence level

0.95
n_bootstrap int

Number of bootstrap resamples

10000
seed int | None

Random seed

42

Returns:

Type Description
BootstrapCI

BootstrapCI with estimate and bounds

Source code in src/quartumse/analysis/statistical_tests.py
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
def bootstrap_ci(
    data: np.ndarray | list[float],
    statistic: Callable[[np.ndarray], float] = np.mean,
    confidence: float = 0.95,
    n_bootstrap: int = 10000,
    seed: int | None = 42,
) -> BootstrapCI:
    """Compute bootstrap confidence interval.

    Args:
        data: Sample data
        statistic: Function to compute statistic (default: mean)
        confidence: Confidence level
        n_bootstrap: Number of bootstrap resamples
        seed: Random seed

    Returns:
        BootstrapCI with estimate and bounds
    """
    data = np.asarray(data)
    rng = np.random.default_rng(seed)

    # Point estimate
    estimate = float(statistic(data))

    # Bootstrap resamples
    n = len(data)
    bootstrap_stats = np.zeros(n_bootstrap)

    for i in range(n_bootstrap):
        resample = rng.choice(data, size=n, replace=True)
        bootstrap_stats[i] = statistic(resample)

    # Percentile method for CI
    alpha = 1 - confidence
    ci_low = float(np.percentile(bootstrap_stats, 100 * alpha / 2))
    ci_high = float(np.percentile(bootstrap_stats, 100 * (1 - alpha / 2)))

    return BootstrapCI(
        estimate=estimate,
        ci_low=ci_low,
        ci_high=ci_high,
        confidence=confidence,
        n_bootstrap=n_bootstrap,
        bootstrap_std=float(np.std(bootstrap_stats)),
    )

bootstrap_hypothesis_test(data_a, data_b, statistic=np.mean, alternative='two-sided', n_bootstrap=10000, alpha=0.05, seed=42)

Bootstrap hypothesis test for difference between groups.

Tests H0: statistic(A) = statistic(B)

Parameters:

Name Type Description Default
data_a ndarray | list[float]

Sample from group A

required
data_b ndarray | list[float]

Sample from group B

required
statistic Callable[[ndarray], float]

Function to compute statistic

mean
alternative str

"two-sided", "less", or "greater"

'two-sided'
n_bootstrap int

Number of bootstrap resamples

10000
alpha float

Significance level

0.05
seed int | None

Random seed

42

Returns:

Type Description
HypothesisTestResult

HypothesisTestResult

Source code in src/quartumse/analysis/statistical_tests.py
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
def bootstrap_hypothesis_test(
    data_a: np.ndarray | list[float],
    data_b: np.ndarray | list[float],
    statistic: Callable[[np.ndarray], float] = np.mean,
    alternative: str = "two-sided",
    n_bootstrap: int = 10000,
    alpha: float = 0.05,
    seed: int | None = 42,
) -> HypothesisTestResult:
    """Bootstrap hypothesis test for difference between groups.

    Tests H0: statistic(A) = statistic(B)

    Args:
        data_a: Sample from group A
        data_b: Sample from group B
        statistic: Function to compute statistic
        alternative: "two-sided", "less", or "greater"
        n_bootstrap: Number of bootstrap resamples
        alpha: Significance level
        seed: Random seed

    Returns:
        HypothesisTestResult
    """
    data_a = np.asarray(data_a)
    data_b = np.asarray(data_b)
    rng = np.random.default_rng(seed)

    # Observed difference
    stat_a = statistic(data_a)
    stat_b = statistic(data_b)
    observed_diff = stat_a - stat_b

    # Pool data under null hypothesis
    pooled = np.concatenate([data_a, data_b])
    n_a, n_b = len(data_a), len(data_b)

    # Bootstrap under null (permutation approach)
    null_diffs = np.zeros(n_bootstrap)
    for i in range(n_bootstrap):
        permuted = rng.permutation(pooled)
        perm_a = permuted[:n_a]
        perm_b = permuted[n_a:]
        null_diffs[i] = statistic(perm_a) - statistic(perm_b)

    # P-value
    if alternative == "two-sided":
        p_value = float(np.mean(np.abs(null_diffs) >= np.abs(observed_diff)))
    elif alternative == "less":
        p_value = float(np.mean(null_diffs <= observed_diff))
    elif alternative == "greater":
        p_value = float(np.mean(null_diffs >= observed_diff))
    else:
        raise ValueError(f"Unknown alternative: {alternative}")

    # Effect size (Cohen's d approximation)
    pooled_std = np.sqrt((np.var(data_a) + np.var(data_b)) / 2)
    effect_size = observed_diff / pooled_std if pooled_std > 0 else 0.0

    # CI for difference via bootstrap
    diff_bootstrap = np.zeros(n_bootstrap)
    for i in range(n_bootstrap):
        resample_a = rng.choice(data_a, size=n_a, replace=True)
        resample_b = rng.choice(data_b, size=n_b, replace=True)
        diff_bootstrap[i] = statistic(resample_a) - statistic(resample_b)

    ci = BootstrapCI(
        estimate=float(observed_diff),
        ci_low=float(np.percentile(diff_bootstrap, 100 * alpha / 2)),
        ci_high=float(np.percentile(diff_bootstrap, 100 * (1 - alpha / 2))),
        confidence=1 - alpha,
        n_bootstrap=n_bootstrap,
        bootstrap_std=float(np.std(diff_bootstrap)),
    )

    return HypothesisTestResult(
        statistic=float(observed_diff),
        p_value=p_value,
        effect_size=float(effect_size),
        ci=ci,
        reject_null=p_value < alpha,
        alpha=alpha,
        test_name="bootstrap_permutation",
    )

compare_protocols_statistically(results_a, results_b, n_total, epsilon=0.01, alpha=0.05, n_bootstrap=10000, seed=42)

Complete statistical comparison between two protocols.

Parameters:

Name Type Description Default
results_a list

Long-form results from protocol A

required
results_b list

Long-form results from protocol B

required
n_total int

Shot budget to compare at

required
epsilon float

Target precision for SSF

0.01
alpha float

Significance level

0.05
n_bootstrap int

Number of bootstrap samples

10000
seed int

Random seed

42

Returns:

Type Description
StatisticalComparison

StatisticalComparison with all tests

Source code in src/quartumse/analysis/statistical_tests.py
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
def compare_protocols_statistically(
    results_a: list,  # LongFormRow
    results_b: list,  # LongFormRow
    n_total: int,
    epsilon: float = 0.01,
    alpha: float = 0.05,
    n_bootstrap: int = 10000,
    seed: int = 42,
) -> StatisticalComparison:
    """Complete statistical comparison between two protocols.

    Args:
        results_a: Long-form results from protocol A
        results_b: Long-form results from protocol B
        n_total: Shot budget to compare at
        epsilon: Target precision for SSF
        alpha: Significance level
        n_bootstrap: Number of bootstrap samples
        seed: Random seed

    Returns:
        StatisticalComparison with all tests
    """
    # Filter to specific N
    se_a = np.array([r.se for r in results_a if r.N_total == n_total])
    se_b = np.array([r.se for r in results_b if r.N_total == n_total])

    protocol_a = results_a[0].protocol_id if results_a else "unknown"
    protocol_b = results_b[0].protocol_id if results_b else "unknown"

    # Bootstrap test for mean SE difference
    diff_test = bootstrap_hypothesis_test(
        se_a,
        se_b,
        statistic=np.mean,
        n_bootstrap=n_bootstrap,
        alpha=alpha,
        seed=seed,
    )

    # K-S test for distribution difference
    ks_result = ks_test_protocols(se_a, se_b, alpha=alpha)

    # Bootstrap SSF
    ssf_ci = bootstrap_ssf(se_a, se_b, epsilon, n_bootstrap, 1 - alpha, seed)

    return StatisticalComparison(
        protocol_a=protocol_a,
        protocol_b=protocol_b,
        n_total=n_total,
        metric="mean_se",
        difference_test=diff_test,
        ks_test=ks_result,
        ssf_ci=ssf_ci,
    )

compute_cost_normalized_metrics(long_form_results, cost_model=None, truth_values=None)

Compute cost-normalized metrics for all protocols.

Parameters:

Name Type Description Default
long_form_results list[LongFormRow]

Long-form benchmark results

required
cost_model CostModel | None

Cost model to use (default: CostModel())

None
truth_values dict[str, float] | None

Ground truth for error computation

None

Returns:

Name Type Description
Dict dict[str, dict[int, CostNormalizedResult]]

protocol_id -> {N -> CostNormalizedResult}

Source code in src/quartumse/analysis/cost_normalized.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
def compute_cost_normalized_metrics(
    long_form_results: list[LongFormRow],
    cost_model: CostModel | None = None,
    truth_values: dict[str, float] | None = None,
) -> dict[str, dict[int, CostNormalizedResult]]:
    """Compute cost-normalized metrics for all protocols.

    Args:
        long_form_results: Long-form benchmark results
        cost_model: Cost model to use (default: CostModel())
        truth_values: Ground truth for error computation

    Returns:
        Dict: protocol_id -> {N -> CostNormalizedResult}
    """
    if cost_model is None:
        cost_model = CostModel()

    # Group by protocol and N
    by_protocol_n: dict[str, dict[int, list[LongFormRow]]] = defaultdict(lambda: defaultdict(list))
    for row in long_form_results:
        by_protocol_n[row.protocol_id][row.N_total].append(row)

    results: dict[str, dict[int, CostNormalizedResult]] = {}

    for protocol_id, by_n in by_protocol_n.items():
        results[protocol_id] = {}

        for n, rows in by_n.items():
            # Compute means
            se_values = [r.se for r in rows]
            mean_se = float(np.mean(se_values))

            # Circuit metrics (may not be available for all rows)
            depths = [
                r.circuit_depth for r in rows if hasattr(r, "circuit_depth") and r.circuit_depth
            ]
            gates = [
                r.twoq_gate_count
                for r in rows
                if hasattr(r, "twoq_gate_count") and r.twoq_gate_count
            ]
            times = [
                r.time_classical_s
                for r in rows
                if hasattr(r, "time_classical_s") and r.time_classical_s
            ]

            mean_depth = float(np.mean(depths)) if depths else 0.0
            mean_gates = float(np.mean(gates)) if gates else 0.0
            mean_time = float(np.mean(times)) if times else 0.0

            # Compute cost
            cost = cost_model.compute_cost(
                n_shots=n,
                circuit_depth=int(mean_depth),
                twoq_gates=int(mean_gates),
                classical_time_s=mean_time,
            )

            # Cost-normalized SE (SE scales as 1/sqrt(N), so normalize by sqrt(cost))
            cost_normalized_se = mean_se * np.sqrt(cost) / np.sqrt(n) if n > 0 else float("inf")

            # Error if truth available
            cost_normalized_error = None
            if truth_values:
                errors = []
                for row in rows:
                    if row.observable_id in truth_values:
                        errors.append(abs(row.estimate - truth_values[row.observable_id]))
                if errors:
                    mean_error = float(np.mean(errors))
                    cost_normalized_error = (
                        mean_error * np.sqrt(cost) / np.sqrt(n) if n > 0 else float("inf")
                    )

            results[protocol_id][n] = CostNormalizedResult(
                protocol_id=protocol_id,
                n_total=n,
                raw_metrics={
                    "mean_se": mean_se,
                    "median_se": float(np.median(se_values)),
                    "max_se": float(np.max(se_values)),
                },
                cost=cost,
                cost_normalized_se=cost_normalized_se,
                cost_normalized_error=cost_normalized_error,
                circuit_depth=mean_depth,
                twoq_gates=mean_gates,
            )

    return results

compute_objective_metrics(long_form_results, weights, objective_type='weighted_sum', true_objective=None, target_epsilon=0.01, n_bootstrap=1000, seed=42)

Compute objective-level metrics from benchmark results.

Parameters:

Name Type Description Default
long_form_results list

List of LongFormRow from benchmark

required
weights dict[str, float]

Dict mapping observable_id -> coefficient

required
objective_type str

"weighted_sum" or "qaoa_cost"

'weighted_sum'
true_objective float | None

Ground truth objective value (if known)

None
target_epsilon float

Target error for N* computation

0.01
n_bootstrap int

Bootstrap samples for CI

1000
seed int

Random seed

42

Returns:

Type Description
ObjectiveAnalysis

ObjectiveAnalysis with per-protocol metrics

Source code in src/quartumse/analysis/objective_metrics.py
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
def compute_objective_metrics(
    long_form_results: list,
    weights: dict[str, float],
    objective_type: str = "weighted_sum",
    true_objective: float | None = None,
    target_epsilon: float = 0.01,
    n_bootstrap: int = 1000,
    seed: int = 42,
) -> ObjectiveAnalysis:
    """Compute objective-level metrics from benchmark results.

    Args:
        long_form_results: List of LongFormRow from benchmark
        weights: Dict mapping observable_id -> coefficient
        objective_type: "weighted_sum" or "qaoa_cost"
        true_objective: Ground truth objective value (if known)
        target_epsilon: Target error for N* computation
        n_bootstrap: Bootstrap samples for CI
        seed: Random seed

    Returns:
        ObjectiveAnalysis with per-protocol metrics
    """
    # Group results by (protocol, N, replicate)
    by_protocol_n_rep = {}
    for row in long_form_results:
        key = (row.protocol_id, row.N_total, row.replicate_id)
        if key not in by_protocol_n_rep:
            by_protocol_n_rep[key] = {}
        by_protocol_n_rep[key][row.observable_id] = row.estimate

    # Get unique protocols and shot counts
    protocols = sorted({row.protocol_id for row in long_form_results})
    shot_counts = sorted({row.N_total for row in long_form_results})

    analysis = ObjectiveAnalysis(
        objective_type=objective_type,
        target_epsilon=target_epsilon,
    )

    for protocol in protocols:
        analysis.estimates_by_protocol[protocol] = {}

        for n_shots in shot_counts:
            # Gather all replicates for this (protocol, N)
            replicate_estimates = []
            for (p, n, _rep), obs_dict in by_protocol_n_rep.items():
                if p == protocol and n == n_shots:
                    replicate_estimates.append(obs_dict)

            if not replicate_estimates:
                continue

            # Compute objective for each replicate
            objective_values = [
                compute_weighted_objective(rep, weights, objective_type)
                for rep in replicate_estimates
            ]

            # Point estimate (mean across replicates)
            estimate = np.mean(objective_values)

            # Bootstrap CI
            se, ci_low, ci_high = bootstrap_objective_ci(
                replicate_estimates,
                weights,
                objective_type,
                n_bootstrap=n_bootstrap,
                seed=seed,
            )

            # Comparison to truth
            abs_error = None
            rel_error = None
            if true_objective is not None:
                abs_error = abs(estimate - true_objective)
                if abs(true_objective) > 1e-10:
                    rel_error = abs_error / abs(true_objective)

            analysis.estimates_by_protocol[protocol][n_shots] = ObjectiveEstimate(
                estimate=estimate,
                se=se,
                ci_low=ci_low,
                ci_high=ci_high,
                true_value=true_objective,
                abs_error=abs_error,
                rel_error=rel_error,
                n_observables=len(weights),
                n_shots=n_shots,
                protocol_id=protocol,
            )

        # Compute N* for objective
        n_star = None
        for n_shots in shot_counts:
            if n_shots in analysis.estimates_by_protocol[protocol]:
                est = analysis.estimates_by_protocol[protocol][n_shots]
                if est.se is not None and est.se <= target_epsilon:
                    n_star = n_shots
                    break
        analysis.n_star_objective[protocol] = n_star

    # Determine winner at max N
    max_n = max(shot_counts)
    best_protocol = None
    best_se = float("inf")

    for protocol in protocols:
        if max_n in analysis.estimates_by_protocol[protocol]:
            est = analysis.estimates_by_protocol[protocol][max_n]
            if est.se is not None and est.se < best_se:
                best_se = est.se
                best_protocol = protocol

    analysis.winner_at_max_n = best_protocol or ""

    # Compute ratio (shadows / baseline)
    shadows_se = None
    baseline_se = None
    for protocol in protocols:
        if max_n in analysis.estimates_by_protocol[protocol]:
            est = analysis.estimates_by_protocol[protocol][max_n]
            if "shadows" in protocol.lower():
                shadows_se = est.se
            elif "grouped" in protocol.lower() or "direct" in protocol.lower():
                baseline_se = est.se

    if shadows_se is not None and baseline_se is not None and baseline_se > 0:
        analysis.objective_ratio = shadows_se / baseline_se

    return analysis

construct_ci(data=None, estimate=None, se=None, method=CIMethodType.NORMAL, confidence_level=0.95, n_bootstrap=1000, seed=None)

Construct confidence interval using specified method.

For NORMAL method, provide estimate and se. For BOOTSTRAP methods, provide data array.

Parameters:

Name Type Description Default
data NDArray[floating] | None

Array of observations (for bootstrap methods).

None
estimate float | None

Point estimate (for normal method).

None
se float | None

Standard error (for normal method).

None
method CIMethodType | str

CI construction method.

NORMAL
confidence_level float

Confidence level (default 0.95).

0.95
n_bootstrap int

Number of bootstrap samples.

1000
seed int | None

Random seed for reproducibility.

None

Returns:

Type Description
ConfidenceInterval

ConfidenceInterval result.

Raises:

Type Description
ValueError

If required parameters are missing.

Source code in src/quartumse/stats/confidence.py
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
def construct_ci(
    data: NDArray[np.floating] | None = None,
    estimate: float | None = None,
    se: float | None = None,
    method: CIMethodType | str = CIMethodType.NORMAL,
    confidence_level: float = 0.95,
    n_bootstrap: int = 1000,
    seed: int | None = None,
) -> ConfidenceInterval:
    """Construct confidence interval using specified method.

    For NORMAL method, provide estimate and se.
    For BOOTSTRAP methods, provide data array.

    Args:
        data: Array of observations (for bootstrap methods).
        estimate: Point estimate (for normal method).
        se: Standard error (for normal method).
        method: CI construction method.
        confidence_level: Confidence level (default 0.95).
        n_bootstrap: Number of bootstrap samples.
        seed: Random seed for reproducibility.

    Returns:
        ConfidenceInterval result.

    Raises:
        ValueError: If required parameters are missing.
    """
    if isinstance(method, str):
        method = CIMethodType(method)

    if method == CIMethodType.NORMAL:
        if estimate is None or se is None:
            raise ValueError("Normal CI requires estimate and se")
        n_samples = len(data) if data is not None else 0
        return normal_ci(estimate, se, confidence_level, n_samples)

    elif method == CIMethodType.BOOTSTRAP_PERCENTILE:
        if data is None:
            raise ValueError("Bootstrap CI requires data array")
        return bootstrap_percentile_ci(data, None, confidence_level, n_bootstrap, seed)

    elif method == CIMethodType.BOOTSTRAP_BCA:
        if data is None:
            raise ValueError("Bootstrap CI requires data array")
        return bootstrap_bca_ci(data, None, confidence_level, n_bootstrap, seed)

    else:
        raise ValueError(f"Unknown CI method: {method}")

construct_simultaneous_cis(estimates, standard_errors, alpha=0.05, fwer_method=FWERMethod.BONFERRONI, ci_method=CIMethodType.NORMAL)

Construct simultaneous CIs with FWER control.

Each CI is constructed at the FWER-adjusted confidence level to ensure family-wise coverage.

Parameters:

Name Type Description Default
estimates list[float]

List of point estimates.

required
standard_errors list[float]

List of standard errors.

required
alpha float

Global significance level.

0.05
fwer_method FWERMethod | str

Method for FWER control.

BONFERRONI
ci_method CIMethodType | str

Method for individual CI construction.

NORMAL

Returns:

Type Description
SimultaneousCIs

SimultaneousCIs with family-wise coverage guarantee.

Source code in src/quartumse/stats/fwer.py
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
def construct_simultaneous_cis(
    estimates: list[float],
    standard_errors: list[float],
    alpha: float = 0.05,
    fwer_method: FWERMethod | str = FWERMethod.BONFERRONI,
    ci_method: CIMethodType | str = CIMethodType.NORMAL,
) -> SimultaneousCIs:
    """Construct simultaneous CIs with FWER control.

    Each CI is constructed at the FWER-adjusted confidence level
    to ensure family-wise coverage.

    Args:
        estimates: List of point estimates.
        standard_errors: List of standard errors.
        alpha: Global significance level.
        fwer_method: Method for FWER control.
        ci_method: Method for individual CI construction.

    Returns:
        SimultaneousCIs with family-wise coverage guarantee.
    """
    M = len(estimates)
    if len(standard_errors) != M:
        raise ValueError("Must have same number of estimates and SEs")

    # Get FWER adjustment
    adjustment = compute_fwer_adjustment(M, alpha, fwer_method)

    # Construct individual CIs at adjusted confidence level
    intervals = []
    for i, (est, se) in enumerate(zip(estimates, standard_errors, strict=False)):
        ci = construct_ci(
            estimate=est,
            se=se,
            method=ci_method,
            confidence_level=adjustment.confidence_individual[i],
        )
        intervals.append(ci)

    return SimultaneousCIs(
        intervals=intervals,
        fwer_adjustment=adjustment,
        coverage_guarantee=adjustment.effective_confidence,
    )

fit_power_law(ns, se_values, initial_exponent=-0.5)

Fit power law SE = a * N^b to data.

Parameters:

Name Type Description Default
ns list[int]

Shot budgets

required
se_values list[float]

Corresponding SE values

required
initial_exponent float

Initial guess for exponent (default -0.5)

-0.5

Returns:

Type Description
PowerLawFit

PowerLawFit with fitted parameters

Source code in src/quartumse/analysis/interpolation.py
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
def fit_power_law(
    ns: list[int],
    se_values: list[float],
    initial_exponent: float = -0.5,
) -> PowerLawFit:
    """Fit power law SE = a * N^b to data.

    Args:
        ns: Shot budgets
        se_values: Corresponding SE values
        initial_exponent: Initial guess for exponent (default -0.5)

    Returns:
        PowerLawFit with fitted parameters
    """
    ns_arr = np.array(ns, dtype=float)
    se_arr = np.array(se_values, dtype=float)

    # Filter out invalid values
    valid = (ns_arr > 0) & (se_arr > 0) & np.isfinite(se_arr)
    ns_arr = ns_arr[valid]
    se_arr = se_arr[valid]

    if len(ns_arr) < 2:
        return PowerLawFit(
            amplitude=np.nan,
            exponent=np.nan,
            r_squared=0.0,
            n_points=len(ns_arr),
            ns=list(ns_arr.astype(int)),
            observed=list(se_arr),
            predicted=[],
        )

    # Initial guess: a = SE[0] * N[0]^0.5, b = -0.5
    a_init = se_arr[0] * np.sqrt(ns_arr[0])

    try:
        popt, _ = curve_fit(
            _power_law,
            ns_arr,
            se_arr,
            p0=[a_init, initial_exponent],
            bounds=([0, -2], [np.inf, 0]),
            maxfev=5000,
        )
        a, b = popt

        # Compute R²
        predicted = _power_law(ns_arr, a, b)
        ss_res = np.sum((se_arr - predicted) ** 2)
        ss_tot = np.sum((se_arr - np.mean(se_arr)) ** 2)
        r_squared = 1 - (ss_res / ss_tot) if ss_tot > 0 else 0.0

    except (RuntimeError, ValueError):
        # Fallback: assume exponent = -0.5 and fit amplitude only
        # SE = a / sqrt(N) => a = SE * sqrt(N)
        a = np.mean(se_arr * np.sqrt(ns_arr))
        b = -0.5
        predicted = _power_law(ns_arr, a, b)
        ss_res = np.sum((se_arr - predicted) ** 2)
        ss_tot = np.sum((se_arr - np.mean(se_arr)) ** 2)
        r_squared = 1 - (ss_res / ss_tot) if ss_tot > 0 else 0.0

    return PowerLawFit(
        amplitude=float(a),
        exponent=float(b),
        r_squared=float(r_squared),
        n_points=len(ns_arr),
        ns=list(ns_arr.astype(int)),
        observed=list(se_arr),
        predicted=list(predicted),
    )

format_objective_analysis(analysis)

Format objective analysis for display.

Source code in src/quartumse/analysis/objective_metrics.py
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
def format_objective_analysis(analysis: ObjectiveAnalysis) -> str:
    """Format objective analysis for display."""
    lines = []
    lines.append(f"OBJECTIVE ANALYSIS ({analysis.objective_type})")
    lines.append("=" * 60)

    # Get shot counts
    shot_counts = set()
    for protocol_data in analysis.estimates_by_protocol.values():
        shot_counts.update(protocol_data.keys())
    shot_counts = sorted(shot_counts)
    max_n = max(shot_counts) if shot_counts else 0

    # Summary at max N
    lines.append(f"\nAt N={max_n}:")
    for protocol, estimates in analysis.estimates_by_protocol.items():
        if max_n in estimates:
            est = estimates[max_n]
            short_name = protocol.replace("classical_shadows_v0", "shadows").replace("direct_", "")

            se_str = f"SE={est.se:.4f}" if est.se is not None else "SE=N/A"
            ci_str = ""
            if est.ci_low is not None and est.ci_high is not None:
                ci_str = f" 95%CI=[{est.ci_low:.4f}, {est.ci_high:.4f}]"

            err_str = ""
            if est.abs_error is not None:
                err_str = f" |err|={est.abs_error:.4f}"

            lines.append(f"  {short_name}: Ê={est.estimate:.4f} {se_str}{ci_str}{err_str}")

    # N* for objective
    lines.append(f"\nN* for objective (SE <= {analysis.target_epsilon}):")
    for protocol, n_star in analysis.n_star_objective.items():
        short_name = protocol.replace("classical_shadows_v0", "shadows").replace("direct_", "")
        n_str = f"N*={n_star}" if n_star else f"N*>{max_n}"
        lines.append(f"  {short_name}: {n_str}")

    # Winner
    lines.append(f"\nWinner at max N: {analysis.winner_at_max_n}")
    lines.append(f"Ratio (shadows/baseline): {analysis.objective_ratio:.2f}x")

    return "\n".join(lines)

format_posthoc_result(result)

Format post-hoc benchmark result for display.

Source code in src/quartumse/analysis/posthoc_benchmark.py
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
def format_posthoc_result(result: PosthocBenchmarkResult) -> str:
    """Format post-hoc benchmark result for display."""
    lines = []
    lines.append("POST-HOC QUERYING BENCHMARK")
    lines.append("=" * 70)
    lines.append(f"Library size: {result.library_size} observables")
    lines.append(f"Query rounds: {result.n_rounds}")
    lines.append(f"Observables per round: {result.observables_per_round}")
    lines.append("")

    # Per-round breakdown
    lines.append(f"{'Round':<8} {'Shadows Shots':>15} {'Direct Shots':>15} {'Obs Queried':>12}")
    lines.append("-" * 55)

    shadows = result.shadows_costs
    direct = result.direct_costs

    if shadows and direct:
        for i in range(result.n_rounds):
            lines.append(
                f"{i:<8} {shadows.shots_per_round[i]:>15,} {direct.shots_per_round[i]:>15,} "
                f"{shadows.observables_answered_per_round[i]:>12}"
            )

    lines.append("-" * 55)
    lines.append(f"{'TOTAL':<8} {result.shadows_total_shots:>15,} {result.direct_total_shots:>15,}")
    lines.append("")

    # Summary
    lines.append(f"Shot savings factor: {result.shot_savings_factor:.1f}x")
    lines.append(f"  (Direct uses {result.shot_savings_factor:.1f}x more shots than Shadows)")

    if result.breakeven_round is not None:
        lines.append("\nBreak-even point:")
        lines.append(
            f"  Round {result.breakeven_round} ({result.breakeven_observables} observables)"
        )
        lines.append("  After this, shadows has lower cumulative quantum cost")
    else:
        lines.append("\nNo break-even: Direct is always cheaper (fully commuting observables?)")

    # Coverage at fixed budgets
    if result.coverage_at_budgets:
        lines.append("\nCOVERAGE AT FIXED SHOT BUDGETS:")
        lines.append(
            f"{'Budget':>12} {'Shadows':>12} {'Direct':>12} {'Shadows %':>12} {'Direct %':>12}"
        )
        lines.append("-" * 65)
        for cov in result.coverage_at_budgets:
            lines.append(
                f"{cov.shot_budget:>12,} {cov.shadows_observables_covered:>12} "
                f"{cov.direct_observables_covered:>12} {cov.shadows_coverage_pct:>11.1f}% "
                f"{cov.direct_coverage_pct:>11.1f}%"
            )

    return "\n".join(lines)

generate_all_k_local(n_qubits, k)

Generate ALL k-local Pauli strings on n qubits.

Total count: C(n,k) * 3^k

Parameters:

Name Type Description Default
n_qubits int

Number of qubits

required
k int

Locality (Pauli weight)

required

Returns:

Type Description
list[str]

List of Pauli strings

Source code in src/quartumse/observables/suites.py
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
def generate_all_k_local(n_qubits: int, k: int) -> list[str]:
    """Generate ALL k-local Pauli strings on n qubits.

    Total count: C(n,k) * 3^k

    Args:
        n_qubits: Number of qubits
        k: Locality (Pauli weight)

    Returns:
        List of Pauli strings
    """
    paulis = []
    pauli_ops = ["X", "Y", "Z"]

    for positions in combinations(range(n_qubits), k):
        for ops in product(pauli_ops, repeat=k):
            pauli_list = ["I"] * n_qubits
            for pos, op in zip(positions, ops, strict=False):
                pauli_list[pos] = op
            paulis.append("".join(pauli_list))

    return paulis

generate_observable_set(generator_id, n_qubits, n_observables, seed, **kwargs)

Convenience function to generate an ObservableSet.

Parameters:

Name Type Description Default
generator_id str

ID of the generator to use.

required
n_qubits int

Number of qubits.

required
n_observables int

Number of observables to generate.

required
seed int

Random seed for reproducibility.

required
**kwargs Any

Additional generator-specific parameters.

{}

Returns:

Type Description
ObservableSet

Generated ObservableSet.

Source code in src/quartumse/observables/generators.py
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
def generate_observable_set(
    generator_id: str,
    n_qubits: int,
    n_observables: int,
    seed: int,
    **kwargs: Any,
) -> ObservableSet:
    """Convenience function to generate an ObservableSet.

    Args:
        generator_id: ID of the generator to use.
        n_qubits: Number of qubits.
        n_observables: Number of observables to generate.
        seed: Random seed for reproducibility.
        **kwargs: Additional generator-specific parameters.

    Returns:
        Generated ObservableSet.
    """
    generator_cls = get_generator(generator_id)
    config = GeneratorConfig(
        n_qubits=n_qubits,
        n_observables=n_observables,
        seed=seed,
        extra=kwargs,
    )
    generator = generator_cls(config)
    return generator.generate()

get_profile(profile_id)

Get a noise profile by ID.

Parameters:

Name Type Description Default
profile_id str

Profile identifier.

required

Returns:

Type Description
NoiseProfile

NoiseProfile object.

Raises:

Type Description
KeyError

If profile not found.

Source code in src/quartumse/noise/profiles.py
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
def get_profile(profile_id: str) -> NoiseProfile:
    """Get a noise profile by ID.

    Args:
        profile_id: Profile identifier.

    Returns:
        NoiseProfile object.

    Raises:
        KeyError: If profile not found.
    """
    if profile_id not in CANONICAL_PROFILES:
        raise KeyError(
            f"Unknown noise profile: {profile_id}. " f"Available: {list(CANONICAL_PROFILES.keys())}"
        )
    return CANONICAL_PROFILES[profile_id]

get_protocol(protocol_id)

Get a protocol class by ID from the global registry.

Source code in src/quartumse/protocols/registry.py
135
136
137
def get_protocol(protocol_id: str) -> type[Protocol]:
    """Get a protocol class by ID from the global registry."""
    return _registry.get(protocol_id)

interpolate_n_star(ns, se_values, epsilon, method='power_law')

Interpolate N* (shots-to-target) using power-law fit.

Instead of grid search, fits SE ∝ N^{-0.5} and solves for N* analytically.

Parameters:

Name Type Description Default
ns list[int]

Shot budgets

required
se_values list[float]

Corresponding SE values (mean or max)

required
epsilon float

Target SE threshold

required
method str

"power_law" or "linear" interpolation

'power_law'

Returns:

Type Description
tuple[float | None, PowerLawFit | None]

Tuple of (interpolated N*, PowerLawFit object or None)

Source code in src/quartumse/analysis/interpolation.py
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
def interpolate_n_star(
    ns: list[int],
    se_values: list[float],
    epsilon: float,
    method: str = "power_law",
) -> tuple[float | None, PowerLawFit | None]:
    """Interpolate N* (shots-to-target) using power-law fit.

    Instead of grid search, fits SE ∝ N^{-0.5} and solves for N* analytically.

    Args:
        ns: Shot budgets
        se_values: Corresponding SE values (mean or max)
        epsilon: Target SE threshold
        method: "power_law" or "linear" interpolation

    Returns:
        Tuple of (interpolated N*, PowerLawFit object or None)
    """
    if method == "power_law":
        fit = fit_power_law(ns, se_values)

        if np.isnan(fit.amplitude) or np.isnan(fit.exponent):
            return None, fit

        # Solve: epsilon = a * N^b => N = (epsilon / a)^(1/b)
        if fit.exponent >= 0:
            return None, fit

        n_star = (epsilon / fit.amplitude) ** (1 / fit.exponent)

        # Sanity check
        if n_star < 0 or not np.isfinite(n_star):
            return None, fit

        return float(n_star), fit

    elif method == "linear":
        # Linear interpolation between grid points
        ns_arr = np.array(ns)
        se_arr = np.array(se_values)

        # Sort by N
        order = np.argsort(ns_arr)
        ns_arr = ns_arr[order]
        se_arr = se_arr[order]

        # Find where SE crosses epsilon
        for i in range(len(ns_arr) - 1):
            if se_arr[i] > epsilon >= se_arr[i + 1]:
                # Handle case where consecutive SE values are equal (avoid division by zero)
                denom = se_arr[i] - se_arr[i + 1]
                if abs(denom) < 1e-12:
                    # SE values are effectively equal - return the lower N
                    n_star = float(ns_arr[i])
                else:
                    # Linear interpolation
                    frac = (se_arr[i] - epsilon) / denom
                    n_star = ns_arr[i] + frac * (ns_arr[i + 1] - ns_arr[i])
                return float(n_star), None

        # Check if already below threshold
        if se_arr[-1] <= epsilon:
            return float(ns_arr[-1]), None

        return None, None

    else:
        raise ValueError(f"Unknown method: {method}")

ks_test_protocols(se_a, se_b, alpha=0.05)

Kolmogorov-Smirnov test for distribution difference.

Tests whether SE distributions differ significantly.

Parameters:

Name Type Description Default
se_a ndarray | list[float]

SE values from protocol A

required
se_b ndarray | list[float]

SE values from protocol B

required
alpha float

Significance level

0.05

Returns:

Type Description
HypothesisTestResult

HypothesisTestResult

Source code in src/quartumse/analysis/statistical_tests.py
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
def ks_test_protocols(
    se_a: np.ndarray | list[float],
    se_b: np.ndarray | list[float],
    alpha: float = 0.05,
) -> HypothesisTestResult:
    """Kolmogorov-Smirnov test for distribution difference.

    Tests whether SE distributions differ significantly.

    Args:
        se_a: SE values from protocol A
        se_b: SE values from protocol B
        alpha: Significance level

    Returns:
        HypothesisTestResult
    """
    se_a = np.asarray(se_a)
    se_b = np.asarray(se_b)

    # K-S test
    ks_stat, p_value = stats.ks_2samp(se_a, se_b)

    # Effect size: difference in means / pooled std
    pooled_std = np.sqrt((np.var(se_a) + np.var(se_b)) / 2)
    effect_size = (np.mean(se_a) - np.mean(se_b)) / pooled_std if pooled_std > 0 else 0.0

    return HypothesisTestResult(
        statistic=float(ks_stat),
        p_value=float(p_value),
        effect_size=float(effect_size),
        ci=None,
        reject_null=p_value < alpha,
        alpha=alpha,
        test_name="kolmogorov_smirnov",
    )

list_profiles()

List available noise profile IDs.

Source code in src/quartumse/noise/profiles.py
168
169
170
def list_profiles() -> list[str]:
    """List available noise profile IDs."""
    return list(CANONICAL_PROFILES.keys())

list_protocols()

List all registered protocol IDs from the global registry.

Source code in src/quartumse/protocols/registry.py
140
141
142
def list_protocols() -> list[str]:
    """List all registered protocol IDs from the global registry."""
    return _registry.list_protocols()

make_bell_suites(n_pairs, seed=42)

Create benchmark suites for Bell pair verification.

Bell pairs: ⊗_i (|00⟩ + |11⟩)_i / √2

Suites

workload_pair_correlations: XX, YY, ZZ on each Bell pair diagnostics_single_qubit: Single-qubit Z for readout diagnostics diagnostics_cross_pair: Cross-pair correlators (crosstalk detection) stress_random_1000: Random observables

Source code in src/quartumse/observables/suites.py
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
def make_bell_suites(n_pairs: int, seed: int = 42) -> dict[str, ObservableSuite]:
    """Create benchmark suites for Bell pair verification.

    Bell pairs: ⊗_i (|00⟩ + |11⟩)_i / √2

    Suites:
        workload_pair_correlations: XX, YY, ZZ on each Bell pair
        diagnostics_single_qubit: Single-qubit Z for readout diagnostics
        diagnostics_cross_pair: Cross-pair correlators (crosstalk detection)
        stress_random_1000: Random observables
    """
    n_qubits = 2 * n_pairs
    suites = {}

    # === WORKLOAD: Pair correlations ===
    pair_paulis = []
    for i in range(n_pairs):
        q1, q2 = 2 * i, 2 * i + 1
        for pp in ["XX", "YY", "ZZ"]:
            pauli_list = ["I"] * n_qubits
            pauli_list[q1] = pp[0]
            pauli_list[q2] = pp[1]
            pair_paulis.append("".join(pauli_list))

    suites["workload_pair_correlations"] = ObservableSuite.from_pauli_strings(
        name="workload_pair_correlations",
        suite_type=SuiteType.WORKLOAD,
        pauli_strings=pair_paulis,
        description=f"XX, YY, ZZ on each of {n_pairs} Bell pairs",
    )

    # === DIAGNOSTICS: Single-qubit Z ===
    single_z = generate_single_qubit(n_qubits, paulis="Z")
    suites["diagnostics_single_qubit"] = ObservableSuite.from_pauli_strings(
        name="diagnostics_single_qubit",
        suite_type=SuiteType.DIAGNOSTIC,
        pauli_strings=single_z,
        description="Single-qubit Z for readout bias diagnostics",
    )

    # === DIAGNOSTICS: Cross-pair correlators ===
    cross_paulis = []
    for i in range(n_pairs):
        for j in range(i + 1, n_pairs):
            # Z on first qubit of pair i, Z on first qubit of pair j
            pauli_list = ["I"] * n_qubits
            pauli_list[2 * i] = "Z"
            pauli_list[2 * j] = "Z"
            cross_paulis.append("".join(pauli_list))

    if cross_paulis:
        suites["diagnostics_cross_pair"] = ObservableSuite.from_pauli_strings(
            name="diagnostics_cross_pair",
            suite_type=SuiteType.DIAGNOSTIC,
            pauli_strings=cross_paulis,
            description="Cross-pair ZZ correlators for crosstalk detection",
        )

    # === STRESS: Random 1000 ===
    stress_paulis = sample_random_paulis(n_qubits, 1000, strategy="stratified", seed=seed)
    suites["stress_random_1000"] = ObservableSuite.from_pauli_strings(
        name="stress_random_1000",
        suite_type=SuiteType.STRESS,
        pauli_strings=stress_paulis,
        description="1000 random Paulis, stratified by weight",
    )

    return suites

make_chemistry_suites(n_qubits, hamiltonian_terms=None, hamiltonian_coeffs=None, molecule_name='generic', seed=42)

Create benchmark suites for chemistry / VQE.

Energy estimation: E = Σ_k c_k ⟨P_k⟩

Parameters:

Name Type Description Default
n_qubits int

Number of qubits

required
hamiltonian_terms list[str] | None

Pauli strings for Hamiltonian (optional)

None
hamiltonian_coeffs list[float] | None

Coefficients for each term (optional)

None
molecule_name str

Name for labeling (H2, LiH, etc.)

'generic'
seed int

Random seed

42
Suites

workload_energy: Hamiltonian terms with weights (if provided) stress_random_1000: Random observables

Source code in src/quartumse/observables/suites.py
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
def make_chemistry_suites(
    n_qubits: int,
    hamiltonian_terms: list[str] | None = None,
    hamiltonian_coeffs: list[float] | None = None,
    molecule_name: str = "generic",
    seed: int = 42,
) -> dict[str, ObservableSuite]:
    """Create benchmark suites for chemistry / VQE.

    Energy estimation: E = Σ_k c_k ⟨P_k⟩

    Args:
        n_qubits: Number of qubits
        hamiltonian_terms: Pauli strings for Hamiltonian (optional)
        hamiltonian_coeffs: Coefficients for each term (optional)
        molecule_name: Name for labeling (H2, LiH, etc.)
        seed: Random seed

    Suites:
        workload_energy: Hamiltonian terms with weights (if provided)
        stress_random_1000: Random observables
    """
    suites = {}

    # === WORKLOAD: Energy ===
    if hamiltonian_terms and hamiltonian_coeffs:
        weights = dict(zip(hamiltonian_terms, hamiltonian_coeffs, strict=False))
        suites["workload_energy"] = ObservableSuite.from_pauli_strings(
            name="workload_energy",
            suite_type=SuiteType.WORKLOAD,
            pauli_strings=hamiltonian_terms,
            weights=weights,
            objective=ObjectiveType.WEIGHTED_SUM,
            description=f"{molecule_name} Hamiltonian energy estimation",
        )
    else:
        # Placeholder: use random 2-local as proxy for molecular Hamiltonian
        placeholder_paulis = generate_zz_correlators(n_qubits, graph="all")
        placeholder_paulis.extend(generate_single_qubit(n_qubits, paulis="XYZ"))

        suites["workload_energy_placeholder"] = ObservableSuite.from_pauli_strings(
            name="workload_energy_placeholder",
            suite_type=SuiteType.WORKLOAD,
            pauli_strings=placeholder_paulis,
            description=f"{molecule_name} placeholder (use actual Hamiltonian when available)",
            metadata={"is_placeholder": True},
        )

    # === STRESS: Random 1000 ===
    stress_paulis = sample_random_paulis(n_qubits, 1000, strategy="stratified", seed=seed)
    suites["stress_random_1000"] = ObservableSuite.from_pauli_strings(
        name="stress_random_1000",
        suite_type=SuiteType.STRESS,
        pauli_strings=stress_paulis,
        description="1000 random Paulis, stratified by weight",
    )

    return suites

make_commuting_suite(n_qubits, basis='Z', include_global=True, name=None)

Create an all-commuting suite (grouped measurement advantage).

Parameters:

Name Type Description Default
n_qubits int

Number of qubits

required
basis Literal['Z', 'X', 'Y']

Which Pauli basis ('Z', 'X', or 'Y')

'Z'
include_global bool

Include global string (e.g., Z^n)

True
name str | None

Suite name

None
Source code in src/quartumse/observables/suites.py
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
def make_commuting_suite(
    n_qubits: int,
    basis: Literal["Z", "X", "Y"] = "Z",
    include_global: bool = True,
    name: str | None = None,
) -> ObservableSuite:
    """Create an all-commuting suite (grouped measurement advantage).

    Args:
        n_qubits: Number of qubits
        basis: Which Pauli basis ('Z', 'X', or 'Y')
        include_global: Include global string (e.g., Z^n)
        name: Suite name
    """
    # All single-qubit in this basis
    paulis = generate_single_qubit(n_qubits, paulis=basis)

    # All 2-local in this basis
    for i in range(n_qubits):
        for j in range(i + 1, n_qubits):
            pauli_list = ["I"] * n_qubits
            pauli_list[i] = basis
            pauli_list[j] = basis
            paulis.append("".join(pauli_list))

    if include_global:
        paulis.append(basis * n_qubits)

    if name is None:
        name = f"commuting_{basis}_only"

    return ObservableSuite.from_pauli_strings(
        name=name,
        suite_type=SuiteType.COMMUTING,
        pauli_strings=list(set(paulis)),
        description=f"All-{basis} observables (fully commuting)",
    )

make_ghz_suites(n_qubits, seed=42)

Create benchmark suites for GHZ state verification.

GHZ state: |00...0⟩ + |11...1⟩ / √2

Suites

workload_stabilizers: GHZ stabilizer generators (X^n, Z_i Z_j pairs) stress_random_1000: 1000 random observables, stratified by weight commuting_z_only: All-Z correlators (grouped measurement advantage) posthoc_library: 2000 observables for post-hoc querying tests

Source code in src/quartumse/observables/suites.py
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
def make_ghz_suites(n_qubits: int, seed: int = 42) -> dict[str, ObservableSuite]:
    """Create benchmark suites for GHZ state verification.

    GHZ state: |00...0⟩ + |11...1⟩ / √2

    Suites:
        workload_stabilizers: GHZ stabilizer generators (X^n, Z_i Z_j pairs)
        stress_random_1000: 1000 random observables, stratified by weight
        commuting_z_only: All-Z correlators (grouped measurement advantage)
        posthoc_library: 2000 observables for post-hoc querying tests
    """
    suites = {}

    # === WORKLOAD: GHZ Stabilizers ===
    # Stabilizers: X^⊗n, Z_i Z_{i+1} for all i (and optionally all Z_i Z_j)
    stabilizer_paulis = []
    stabilizer_paulis.append("X" * n_qubits)  # Global X parity
    stabilizer_paulis.extend(generate_zz_correlators(n_qubits, graph="all"))  # All ZZ pairs

    suites["workload_stabilizers"] = ObservableSuite.from_pauli_strings(
        name="workload_stabilizers",
        suite_type=SuiteType.WORKLOAD,
        pauli_strings=stabilizer_paulis,
        description=f"GHZ stabilizer generators: X^{n_qubits} + all Z_i Z_j pairs",
    )

    # === STRESS: Random 1000 observables ===
    stress_paulis = sample_random_paulis(n_qubits, 1000, strategy="stratified", seed=seed)
    suites["stress_random_1000"] = ObservableSuite.from_pauli_strings(
        name="stress_random_1000",
        suite_type=SuiteType.STRESS,
        pauli_strings=stress_paulis,
        description="1000 random Paulis, stratified by weight",
    )

    # === COMMUTING: Z-only correlators ===
    z_paulis = generate_zz_correlators(n_qubits, graph="all")
    z_paulis.extend(generate_single_qubit(n_qubits, paulis="Z"))
    z_paulis.append("Z" * n_qubits)

    suites["commuting_z_only"] = ObservableSuite.from_pauli_strings(
        name="commuting_z_only",
        suite_type=SuiteType.COMMUTING,
        pauli_strings=list(set(z_paulis)),  # Deduplicate
        description="All-Z observables (fully commuting, grouped measurement advantage)",
    )

    # === POSTHOC: Large library for querying tests ===
    posthoc_paulis = sample_random_paulis(n_qubits, 2000, strategy="stratified", seed=seed + 1000)
    suites["posthoc_library"] = ObservableSuite.from_pauli_strings(
        name="posthoc_library",
        suite_type=SuiteType.POSTHOC,
        pauli_strings=posthoc_paulis,
        description="2000 observables for post-hoc querying benchmark",
    )

    return suites

make_ising_suites(n_qubits, seed=42)

Create benchmark suites for Ising/Trotter physics.

Transverse-field Ising: H = -J Σ Z_i Z_{i+1} - h Σ X_i

Suites

workload_energy: Hamiltonian terms (ZZ chain + X single-qubit) workload_correlations: Z_i Z_j at multiple distances stress_random_1000: Random observables

Source code in src/quartumse/observables/suites.py
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
def make_ising_suites(n_qubits: int, seed: int = 42) -> dict[str, ObservableSuite]:
    """Create benchmark suites for Ising/Trotter physics.

    Transverse-field Ising: H = -J Σ Z_i Z_{i+1} - h Σ X_i

    Suites:
        workload_energy: Hamiltonian terms (ZZ chain + X single-qubit)
        workload_correlations: Z_i Z_j at multiple distances
        stress_random_1000: Random observables
    """
    suites = {}

    # === WORKLOAD: Energy (Hamiltonian terms) ===
    energy_paulis = []
    energy_weights = {}

    # ZZ chain terms (J = 1.0)
    zz_chain = generate_zz_correlators(n_qubits, graph="chain")
    for ps in zz_chain:
        energy_paulis.append(ps)
        energy_weights[ps] = -1.0  # -J

    # X single-qubit terms (h = 0.5)
    x_single = generate_single_qubit(n_qubits, paulis="X")
    for ps in x_single:
        energy_paulis.append(ps)
        energy_weights[ps] = -0.5  # -h

    suites["workload_energy"] = ObservableSuite.from_pauli_strings(
        name="workload_energy",
        suite_type=SuiteType.WORKLOAD,
        pauli_strings=energy_paulis,
        weights=energy_weights,
        objective=ObjectiveType.WEIGHTED_SUM,
        description="Ising Hamiltonian: -J Σ Z_i Z_{i+1} - h Σ X_i",
    )

    # === WORKLOAD: Correlation functions ===
    corr_paulis = []
    for r in range(1, min(n_qubits, 5)):  # Distances 1, 2, 3, 4
        for i in range(n_qubits - r):
            pauli_list = ["I"] * n_qubits
            pauli_list[i] = "Z"
            pauli_list[i + r] = "Z"
            corr_paulis.append("".join(pauli_list))

    suites["workload_correlations"] = ObservableSuite.from_pauli_strings(
        name="workload_correlations",
        suite_type=SuiteType.WORKLOAD,
        pauli_strings=list(set(corr_paulis)),
        description="Z_i Z_j correlators at distances r=1,2,3,4",
    )

    # === STRESS: Random 1000 ===
    stress_paulis = sample_random_paulis(n_qubits, 1000, strategy="stratified", seed=seed)
    suites["stress_random_1000"] = ObservableSuite.from_pauli_strings(
        name="stress_random_1000",
        suite_type=SuiteType.STRESS,
        pauli_strings=stress_paulis,
        description="1000 random Paulis, stratified by weight",
    )

    return suites

make_phase_sensing_suites(n_qubits, seed=42)

Create benchmark suites for GHZ phase sensing / metrology.

GHZ state with phase: (|00...0⟩ + e^{inφ}|11...1⟩) / √2

Key observables for phase estimation
  • X^⊗n: Real part of off-diagonal coherence
  • Y^⊗n: Imaginary part of off-diagonal coherence
Suites

workload_phase_signal: X^n and Y^n (ALWAYS included for n >= 2) workload_stabilizers: Full GHZ stabilizer set stress_random_500: Random observables

Source code in src/quartumse/observables/suites.py
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
def make_phase_sensing_suites(n_qubits: int, seed: int = 42) -> dict[str, ObservableSuite]:
    """Create benchmark suites for GHZ phase sensing / metrology.

    GHZ state with phase: (|00...0⟩ + e^{inφ}|11...1⟩) / √2

    Key observables for phase estimation:
        - X^⊗n: Real part of off-diagonal coherence
        - Y^⊗n: Imaginary part of off-diagonal coherence

    Suites:
        workload_phase_signal: X^n and Y^n (ALWAYS included for n >= 2)
        workload_stabilizers: Full GHZ stabilizer set
        stress_random_500: Random observables
    """
    suites = {}

    # === WORKLOAD: Phase signal observables ===
    # CRITICAL: Always include Y^n for all n >= 2
    phase_paulis = ["X" * n_qubits, "Y" * n_qubits]

    suites["workload_phase_signal"] = ObservableSuite.from_pauli_strings(
        name="workload_phase_signal",
        suite_type=SuiteType.WORKLOAD,
        pauli_strings=phase_paulis,
        description=f"Phase sensing: X^{n_qubits} and Y^{n_qubits} (always included)",
        metadata={"includes_Y_global": True},
    )

    # === WORKLOAD: Full stabilizers (for fidelity estimation) ===
    stabilizer_paulis = list(phase_paulis)  # Copy
    stabilizer_paulis.extend(generate_zz_correlators(n_qubits, graph="chain"))

    suites["workload_stabilizers"] = ObservableSuite.from_pauli_strings(
        name="workload_stabilizers",
        suite_type=SuiteType.WORKLOAD,
        pauli_strings=list(set(stabilizer_paulis)),
        description="GHZ stabilizers for fidelity estimation",
    )

    # === STRESS: Random 500 ===
    stress_paulis = sample_random_paulis(n_qubits, 500, strategy="stratified", seed=seed)
    suites["stress_random_500"] = ObservableSuite.from_pauli_strings(
        name="stress_random_500",
        suite_type=SuiteType.STRESS,
        pauli_strings=stress_paulis,
        description="500 random Paulis for scaling test",
    )

    return suites

make_posthoc_library(n_qubits, n_observables=2000, seed=42, name=None)

Create a post-hoc querying library.

Parameters:

Name Type Description Default
n_qubits int

Number of qubits

required
n_observables int

Library size

2000
seed int

Random seed

42
name str | None

Suite name

None
Source code in src/quartumse/observables/suites.py
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
def make_posthoc_library(
    n_qubits: int,
    n_observables: int = 2000,
    seed: int = 42,
    name: str | None = None,
) -> ObservableSuite:
    """Create a post-hoc querying library.

    Args:
        n_qubits: Number of qubits
        n_observables: Library size
        seed: Random seed
        name: Suite name
    """
    paulis = sample_random_paulis(n_qubits, n_observables, strategy="stratified", seed=seed)

    if name is None:
        name = f"posthoc_library_{n_observables}"

    return ObservableSuite.from_pauli_strings(
        name=name,
        suite_type=SuiteType.POSTHOC,
        pauli_strings=paulis,
        description=f"Library of {n_observables} observables for post-hoc querying tests",
    )

make_qaoa_ring_suites(n_qubits, seed=42)

Create benchmark suites for QAOA MAX-CUT on ring graph.

Ring graph: edges (i, i+1) for i=0..n-2, PLUS wrap-around (n-1, 0)

Cost function: C = Σ_e (1 - ⟨Z_i Z_j⟩) / 2

Suites

workload_cost: All edge ZZ terms (INCLUDING wrap-around!) commuting_z_only: Same as workload (all commute in Z basis) stress_random_1000: Random observables

Source code in src/quartumse/observables/suites.py
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
def make_qaoa_ring_suites(n_qubits: int, seed: int = 42) -> dict[str, ObservableSuite]:
    """Create benchmark suites for QAOA MAX-CUT on ring graph.

    Ring graph: edges (i, i+1) for i=0..n-2, PLUS wrap-around (n-1, 0)

    Cost function: C = Σ_e (1 - ⟨Z_i Z_j⟩) / 2

    Suites:
        workload_cost: All edge ZZ terms (INCLUDING wrap-around!)
        commuting_z_only: Same as workload (all commute in Z basis)
        stress_random_1000: Random observables
    """
    suites = {}

    # === WORKLOAD: Cost Hamiltonian ===
    # CRITICAL: Include wrap-around edge (n-1, 0)!
    edge_paulis = generate_zz_correlators(n_qubits, graph="ring")
    edge_weights = dict.fromkeys(edge_paulis, 0.5)  # (1 - ⟨ZZ⟩)/2, so weight is -0.5 on ⟨ZZ⟩

    suites["workload_cost"] = ObservableSuite.from_pauli_strings(
        name="workload_cost",
        suite_type=SuiteType.WORKLOAD,
        pauli_strings=edge_paulis,
        weights=edge_weights,
        objective=ObjectiveType.WEIGHTED_SUM,
        description=f"QAOA ring cost: {n_qubits} edges INCLUDING wrap (n-1,0)",
        metadata={"graph": "ring", "includes_wrap": True},
    )

    # === COMMUTING: Same terms (all ZZ commute) ===
    # This shows where grouped direct measurement dominates
    suites["commuting_cost"] = ObservableSuite.from_pauli_strings(
        name="commuting_cost",
        suite_type=SuiteType.COMMUTING,
        pauli_strings=edge_paulis,
        weights=edge_weights,
        objective=ObjectiveType.WEIGHTED_SUM,
        description="Same as workload_cost (all ZZ commute → grouped wins)",
    )

    # === STRESS: Random mixed to show where shadows helps ===
    stress_paulis = sample_random_paulis(n_qubits, 1000, strategy="stratified", seed=seed)
    suites["stress_random_1000"] = ObservableSuite.from_pauli_strings(
        name="stress_random_1000",
        suite_type=SuiteType.STRESS,
        pauli_strings=stress_paulis,
        description="1000 random Paulis (non-commuting → shadows may help)",
    )

    # === POSTHOC: Library for "measure once, query later" ===
    posthoc_paulis = sample_random_paulis(n_qubits, 2000, strategy="stratified", seed=seed + 1000)
    suites["posthoc_library"] = ObservableSuite.from_pauli_strings(
        name="posthoc_library",
        suite_type=SuiteType.POSTHOC,
        pauli_strings=posthoc_paulis,
        description="2000 observables for post-hoc querying benchmark",
    )

    return suites

make_stress_suite(n_qubits, n_observables=1000, strategy='stratified', seed=42, name=None)

Create a stress test suite with many observables.

Parameters:

Name Type Description Default
n_qubits int

Number of qubits

required
n_observables int

Number of observables to sample

1000
strategy str

Sampling strategy ('stratified', 'uniform', 'importance')

'stratified'
seed int

Random seed

42
name str | None

Suite name (auto-generated if None)

None
Source code in src/quartumse/observables/suites.py
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
def make_stress_suite(
    n_qubits: int,
    n_observables: int = 1000,
    strategy: str = "stratified",
    seed: int = 42,
    name: str | None = None,
) -> ObservableSuite:
    """Create a stress test suite with many observables.

    Args:
        n_qubits: Number of qubits
        n_observables: Number of observables to sample
        strategy: Sampling strategy ('stratified', 'uniform', 'importance')
        seed: Random seed
        name: Suite name (auto-generated if None)
    """
    paulis = sample_random_paulis(n_qubits, n_observables, strategy=strategy, seed=seed)

    if name is None:
        name = f"stress_{strategy}_{n_observables}"

    return ObservableSuite.from_pauli_strings(
        name=name,
        suite_type=SuiteType.STRESS,
        pauli_strings=paulis,
        description=f"{n_observables} observables, {strategy} sampling",
    )

multi_pilot_analysis(long_form_results, target_n=None, pilot_fractions=None, metric='mean_se')

Analyze pilot-based selection across multiple pilot fractions.

Parameters:

Name Type Description Default
long_form_results list[LongFormRow]

Long-form benchmark results

required
target_n int | None

Final shot budget (None = use max)

None
pilot_fractions list[float] | None

List of pilot fractions to test

None
metric str

Quality metric ("mean_se", "median_se", "max_se")

'mean_se'

Returns:

Type Description
MultiPilotAnalysis

MultiPilotAnalysis with results for each fraction

Source code in src/quartumse/analysis/pilot_analysis.py
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
def multi_pilot_analysis(
    long_form_results: list[LongFormRow],
    target_n: int | None = None,
    pilot_fractions: list[float] | None = None,
    metric: str = "mean_se",
) -> MultiPilotAnalysis:
    """Analyze pilot-based selection across multiple pilot fractions.

    Args:
        long_form_results: Long-form benchmark results
        target_n: Final shot budget (None = use max)
        pilot_fractions: List of pilot fractions to test
        metric: Quality metric ("mean_se", "median_se", "max_se")

    Returns:
        MultiPilotAnalysis with results for each fraction
    """
    if pilot_fractions is None:
        pilot_fractions = [0.02, 0.05, 0.10, 0.20]

    # Determine target N
    all_ns = sorted({r.N_total for r in long_form_results})
    if target_n is None:
        target_n = all_ns[-1]

    # Group by protocol
    by_protocol: dict[str, list[LongFormRow]] = defaultdict(list)
    for row in long_form_results:
        by_protocol[row.protocol_id].append(row)

    protocols = list(by_protocol.keys())

    # Get available N values
    available_ns = set()
    for rows in by_protocol.values():
        available_ns.update(r.N_total for r in rows)
    available_ns = sorted(available_ns)

    results = {}

    for frac in pilot_fractions:
        # Find closest available N to pilot fraction
        ideal_pilot_n = int(frac * target_n)
        pilot_n = min(available_ns, key=lambda x: abs(x - ideal_pilot_n))

        if pilot_n >= target_n:
            # Skip if pilot >= target
            continue

        # Group by replicate
        replicate_ids = set()
        for rows in by_protocol.values():
            for row in rows:
                if row.N_total in [pilot_n, target_n]:
                    replicate_ids.add(row.replicate_id)

        selection_results = []
        selections: Counter[str] = Counter()

        for rep_id in sorted(replicate_ids):
            # Get pilot quality for each protocol
            pilot_quality = {}
            target_quality = {}

            for protocol_id, rows in by_protocol.items():
                pilot_rows = [r for r in rows if r.N_total == pilot_n and r.replicate_id == rep_id]
                target_rows = [
                    r for r in rows if r.N_total == target_n and r.replicate_id == rep_id
                ]

                pilot_quality[protocol_id] = _compute_quality(pilot_rows, metric)
                target_quality[protocol_id] = _compute_quality(target_rows, metric)

            if not pilot_quality or not target_quality:
                continue

            # Select best protocol based on pilot
            selected = min(pilot_quality, key=pilot_quality.get)
            # Oracle: best at target
            oracle = min(target_quality, key=target_quality.get)

            selections[selected] += 1

            # Compute regret
            regret = target_quality[selected] - target_quality[oracle]

            selection_results.append(
                {
                    "replicate_id": rep_id,
                    "selected": selected,
                    "oracle": oracle,
                    "correct": selected == oracle,
                    "regret": regret,
                    "pilot_quality": pilot_quality,
                    "target_quality": target_quality,
                }
            )

        if not selection_results:
            continue

        accuracy = float(np.mean([r["correct"] for r in selection_results]))
        regrets = [r["regret"] for r in selection_results]

        results[frac] = PilotFractionResult(
            pilot_fraction=frac,
            pilot_n=pilot_n,
            target_n=target_n,
            selection_accuracy=accuracy,
            mean_regret=float(np.mean(regrets)),
            max_regret=float(np.max(regrets)),
            protocol_selections=dict(selections),
            per_replicate=selection_results,
        )

    # Detect degeneracy: all fractions snapped to the same grid point
    pilot_n_values = [r.pilot_n for r in results.values()]
    unique_ns = len(set(pilot_n_values)) if pilot_n_values else 0
    is_degenerate = unique_ns < len(pilot_fractions) and unique_ns <= 1

    # Find optimal fraction (highest accuracy with reasonable cost)
    if results and not is_degenerate:
        # Simple heuristic: highest accuracy
        optimal = max(results.keys(), key=lambda f: results[f].selection_accuracy)
    else:
        optimal = None

    # Summary
    summary = {
        "n_protocols": len(protocols),
        "protocols": protocols,
        "n_replicates": len(replicate_ids) if "replicate_ids" in dir() else 0,
        "accuracy_by_fraction": {f: r.selection_accuracy for f, r in results.items()},
        "regret_by_fraction": {f: r.mean_regret for f, r in results.items()},
    }
    if is_degenerate:
        summary["degenerate_warning"] = (
            f"All {len(pilot_fractions)} pilot fractions snapped to pilot_n="
            f"{pilot_n_values[0] if pilot_n_values else '?'}. "
            f"Results are meaningless — use interpolated_pilot_analysis() instead."
        )

    return MultiPilotAnalysis(
        target_n=target_n,
        fractions=pilot_fractions,
        results=results,
        optimal_fraction=optimal,
        summary=summary,
        degenerate=is_degenerate,
        unique_pilot_ns=unique_ns,
    )

normal_ci(estimate, se, confidence_level=0.95, n_samples=0)

Construct normal (Wald) confidence interval.

CI = estimate ± z_{α/2} * SE

where z_{α/2} is the (1 - α/2) quantile of the standard normal.

Parameters:

Name Type Description Default
estimate float

Point estimate.

required
se float

Standard error.

required
confidence_level float

Confidence level (default 0.95).

0.95
n_samples int

Number of samples (for metadata).

0

Returns:

Type Description
ConfidenceInterval

ConfidenceInterval with normal CI.

Source code in src/quartumse/stats/confidence.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
def normal_ci(
    estimate: float,
    se: float,
    confidence_level: float = 0.95,
    n_samples: int = 0,
) -> ConfidenceInterval:
    """Construct normal (Wald) confidence interval.

    CI = estimate ± z_{α/2} * SE

    where z_{α/2} is the (1 - α/2) quantile of the standard normal.

    Args:
        estimate: Point estimate.
        se: Standard error.
        confidence_level: Confidence level (default 0.95).
        n_samples: Number of samples (for metadata).

    Returns:
        ConfidenceInterval with normal CI.
    """
    from scipy import stats

    alpha = 1 - confidence_level
    z = stats.norm.ppf(1 - alpha / 2)

    ci_low_raw = estimate - z * se
    ci_high_raw = estimate + z * se

    return ConfidenceInterval(
        estimate=estimate,
        se=se,
        ci_low_raw=ci_low_raw,
        ci_high_raw=ci_high_raw,
        ci_low=clamp_to_physical_bounds(ci_low_raw),
        ci_high=clamp_to_physical_bounds(ci_high_raw),
        confidence_level=confidence_level,
        method=CIMethodType.NORMAL,
        n_samples=n_samples,
    )

per_observable_crossover(results_a, results_b, metric='se', interpolate=True)

Compute per-observable crossover analysis.

For each observable, finds at what N protocol A beats protocol B.

Parameters:

Name Type Description Default
results_a list[LongFormRow]

Long-form results from protocol A

required
results_b list[LongFormRow]

Long-form results from protocol B

required
metric str

"se" or "error"

'se'
interpolate bool

Whether to interpolate crossover using power-law

True

Returns:

Type Description
CrossoverAnalysis

CrossoverAnalysis with per-observable results

Source code in src/quartumse/analysis/crossover.py
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
def per_observable_crossover(
    results_a: list[LongFormRow],
    results_b: list[LongFormRow],
    metric: str = "se",
    interpolate: bool = True,
) -> CrossoverAnalysis:
    """Compute per-observable crossover analysis.

    For each observable, finds at what N protocol A beats protocol B.

    Args:
        results_a: Long-form results from protocol A
        results_b: Long-form results from protocol B
        metric: "se" or "error"
        interpolate: Whether to interpolate crossover using power-law

    Returns:
        CrossoverAnalysis with per-observable results
    """
    protocol_a = results_a[0].protocol_id if results_a else "unknown"
    protocol_b = results_b[0].protocol_id if results_b else "unknown"

    # Group by observable and N
    def group_by_obs_n(rows: list[LongFormRow]) -> dict[str, dict[int, list[float]]]:
        result: dict[str, dict[int, list[float]]] = {}
        for row in rows:
            obs_id = row.observable_id
            n = row.N_total
            if obs_id not in result:
                result[obs_id] = {}
            if n not in result[obs_id]:
                result[obs_id][n] = []

            if metric == "se":
                result[obs_id][n].append(row.se)
            else:
                # Would need truth values for error
                result[obs_id][n].append(row.se)  # Fallback to SE

        return result

    grouped_a = group_by_obs_n(results_a)
    grouped_b = group_by_obs_n(results_b)

    # Get Pauli strings from results
    pauli_strings = {}
    for row in results_a + results_b:
        if hasattr(row, "pauli_string") and row.pauli_string:
            pauli_strings[row.observable_id] = row.pauli_string

    common_obs = set(grouped_a.keys()) & set(grouped_b.keys())

    per_obs_results = []

    for obs_id in sorted(common_obs):
        data_a = grouped_a[obs_id]
        data_b = grouped_b[obs_id]
        common_ns = sorted(set(data_a.keys()) & set(data_b.keys()))

        if not common_ns:
            continue

        # Compute mean metric at each N
        mean_a = {n: np.mean(data_a[n]) for n in common_ns}
        mean_b = {n: np.mean(data_b[n]) for n in common_ns}

        # Compute SE ratio
        se_ratio = {n: mean_a[n] / mean_b[n] if mean_b[n] > 0 else float("inf") for n in common_ns}

        # Determine crossover
        a_better_at = [n for n in common_ns if mean_a[n] < mean_b[n]]
        b_better_at = [n for n in common_ns if mean_b[n] < mean_a[n]]

        a_always_better = len(b_better_at) == 0 and len(a_better_at) > 0
        b_always_better = len(a_better_at) == 0 and len(b_better_at) > 0

        crossover_n = None

        # Find crossover point
        if a_better_at and b_better_at:
            # There's a crossover somewhere
            for i in range(len(common_ns) - 1):
                n1, n2 = common_ns[i], common_ns[i + 1]
                # A was worse, now better
                if mean_a[n1] >= mean_b[n1] and mean_a[n2] < mean_b[n2]:
                    if interpolate:
                        # Linear interpolation
                        ratio1 = mean_a[n1] / mean_b[n1] if mean_b[n1] > 0 else float("inf")
                        ratio2 = mean_a[n2] / mean_b[n2] if mean_b[n2] > 0 else float("inf")
                        denom = ratio2 - ratio1
                        # Check for valid interpolation (finite denominator, not too small)
                        if np.isfinite(ratio1) and np.isfinite(ratio2) and abs(denom) > 1e-12:
                            frac = (1 - ratio1) / denom
                            crossover_n = n1 + frac * (n2 - n1)
                        else:
                            # Fallback to midpoint if interpolation not possible
                            crossover_n = (n1 + n2) / 2
                    else:
                        crossover_n = n2
                    break

        # Get Pauli string and locality
        pauli = pauli_strings.get(obs_id, None)
        locality = sum(1 for c in pauli if c != "I") if pauli else 0

        # Fit power laws if interpolating
        fit_a = None
        fit_b = None
        if interpolate and len(common_ns) >= 3:
            fit_a = fit_power_law(common_ns, [mean_a[n] for n in common_ns])
            fit_b = fit_power_law(common_ns, [mean_b[n] for n in common_ns])

        per_obs_results.append(
            ObservableCrossover(
                observable_id=obs_id,
                pauli_string=pauli,
                locality=locality,
                crossover_n=crossover_n,
                a_always_better=a_always_better,
                b_always_better=b_always_better,
                se_ratio_by_n=se_ratio,
                fit_a=fit_a,
                fit_b=fit_b,
            )
        )

    return CrossoverAnalysis(
        protocol_a=protocol_a,
        protocol_b=protocol_b,
        metric=metric,
        per_observable=per_obs_results,
    )

plot_attainment_curves(attainment_data, epsilon, config=None, ax=None)

Plot attainment curves for multiple protocols.

f(N;ε) = fraction of observables with SE ≤ ε

Parameters:

Name Type Description Default
attainment_data dict[str, dict[int, float]]

Dict mapping protocol_id to {N: attainment}.

required
epsilon float

Target precision used.

required
config PlotConfig | None

Plot configuration.

None
ax Axes | None

Existing axes to plot on.

None

Returns:

Type Description
Figure

Matplotlib Figure.

Source code in src/quartumse/viz/plots.py
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
def plot_attainment_curves(
    attainment_data: dict[str, dict[int, float]],
    epsilon: float,
    config: PlotConfig | None = None,
    ax: Axes | None = None,
) -> Figure:
    """Plot attainment curves for multiple protocols.

    f(N;ε) = fraction of observables with SE ≤ ε

    Args:
        attainment_data: Dict mapping protocol_id to {N: attainment}.
        epsilon: Target precision used.
        config: Plot configuration.
        ax: Existing axes to plot on.

    Returns:
        Matplotlib Figure.
    """
    _check_matplotlib()
    config = config or PlotConfig()

    if ax is None:
        fig, ax = plt.subplots(figsize=config.figsize, dpi=config.dpi)
    else:
        fig = ax.get_figure()

    for i, (protocol_id, data) in enumerate(attainment_data.items()):
        ns = sorted(data.keys())
        attainments = [data[n] for n in ns]
        color = config.palette[i % len(config.palette)]
        ax.plot(ns, attainments, "o-", label=protocol_id, color=color, linewidth=2)

    ax.set_xlabel("Shot Budget (N)", fontsize=12)
    ax.set_ylabel(f"Attainment f(N; ε={epsilon})", fontsize=12)
    ax.set_title("Attainment Curves", fontsize=14)
    ax.set_xscale("log")
    ax.set_ylim(0, 1.05)
    ax.axhline(y=1.0, color="gray", linestyle="--", alpha=0.5)
    ax.legend(loc="lower right")
    ax.grid(True, alpha=0.3)

    plt.tight_layout()
    return fig

plot_ssf_comparison(ssf_data, baseline_id, config=None, ax=None)

Plot shot-savings factor bar chart.

Parameters:

Name Type Description Default
ssf_data dict[str, float]

Dict mapping protocol_id to SSF value.

required
baseline_id str

Baseline protocol ID (SSF=1).

required
config PlotConfig | None

Plot configuration.

None
ax Axes | None

Existing axes to plot on.

None

Returns:

Type Description
Figure

Matplotlib Figure.

Source code in src/quartumse/viz/plots.py
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
def plot_ssf_comparison(
    ssf_data: dict[str, float],
    baseline_id: str,
    config: PlotConfig | None = None,
    ax: Axes | None = None,
) -> Figure:
    """Plot shot-savings factor bar chart.

    Args:
        ssf_data: Dict mapping protocol_id to SSF value.
        baseline_id: Baseline protocol ID (SSF=1).
        config: Plot configuration.
        ax: Existing axes to plot on.

    Returns:
        Matplotlib Figure.
    """
    _check_matplotlib()
    config = config or PlotConfig()

    if ax is None:
        fig, ax = plt.subplots(figsize=config.figsize, dpi=config.dpi)
    else:
        fig = ax.get_figure()

    protocols = list(ssf_data.keys())
    ssfs = list(ssf_data.values())
    colors = [config.palette[0] if ssf >= 1 else config.palette[3] for ssf in ssfs]

    bars = ax.bar(protocols, ssfs, color=colors, edgecolor="black")
    ax.axhline(y=1.0, color="gray", linestyle="--", linewidth=2, label=f"Baseline ({baseline_id})")

    ax.set_xlabel("Protocol", fontsize=12)
    ax.set_ylabel("Shot-Savings Factor (SSF)", fontsize=12)
    ax.set_title(f"Shot-Savings Factor vs {baseline_id}", fontsize=14)
    ax.set_ylim(0, max(ssfs) * 1.2)

    # Add value labels on bars
    for bar, ssf in zip(bars, ssfs, strict=False):
        height = bar.get_height()
        ax.annotate(
            f"{ssf:.2f}×",
            xy=(bar.get_x() + bar.get_width() / 2, height),
            xytext=(0, 3),
            textcoords="offset points",
            ha="center",
            va="bottom",
            fontsize=10,
        )

    ax.legend()
    plt.xticks(rotation=45, ha="right")
    plt.tight_layout()
    return fig

run_benchmark_suite(circuit, observable_set, circuit_id='circuit', config=None, protocols=None, locality_map=None)

Run unified benchmark suite.

This is the main entry point for running benchmarks. It provides three modes:

  • basic: Run core protocols + Tasks 1, 3, 6 + basic report
  • complete: Run all 8 tasks + complete report
  • analysis: Complete + enhanced analysis (crossover, locality, bootstrap)

All results are saved to a unique timestamped directory.

Parameters:

Name Type Description Default
circuit Any

Quantum circuit (any Qiskit QuantumCircuit)

required
observable_set ObservableSet

Set of observables to estimate

required
circuit_id str

Identifier for the circuit

'circuit'
config BenchmarkSuiteConfig | None

Benchmark configuration (default: BenchmarkSuiteConfig())

None
protocols list | None

List of protocol IDs or instances (default: all baselines + shadows)

None
locality_map dict[str, int] | None

Optional mapping of observable_id -> locality (Pauli weight)

None

Returns:

Type Description
BenchmarkSuiteResult

BenchmarkSuiteResult with all outputs and paths to saved reports

Example

from quartumse import run_benchmark_suite, BenchmarkMode, BenchmarkSuiteConfig

Basic benchmark

result = run_benchmark_suite(circuit, observables, circuit_id="ghz_4q")

Complete with all 8 tasks

config = BenchmarkSuiteConfig(mode=BenchmarkMode.COMPLETE) result = run_benchmark_suite(circuit, observables, config=config)

Full analysis

config = BenchmarkSuiteConfig(mode=BenchmarkMode.ANALYSIS) result = run_benchmark_suite(circuit, observables, config=config)

print(f"Reports saved to: {result.output_dir}")

Source code in src/quartumse/benchmark_suite.py
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
def run_benchmark_suite(
    circuit: Any,
    observable_set: ObservableSet,
    circuit_id: str = "circuit",
    config: BenchmarkSuiteConfig | None = None,
    protocols: list | None = None,
    locality_map: dict[str, int] | None = None,
) -> BenchmarkSuiteResult:
    """Run unified benchmark suite.

    This is the main entry point for running benchmarks. It provides three modes:

    - **basic**: Run core protocols + Tasks 1, 3, 6 + basic report
    - **complete**: Run all 8 tasks + complete report
    - **analysis**: Complete + enhanced analysis (crossover, locality, bootstrap)

    All results are saved to a unique timestamped directory.

    Args:
        circuit: Quantum circuit (any Qiskit QuantumCircuit)
        observable_set: Set of observables to estimate
        circuit_id: Identifier for the circuit
        config: Benchmark configuration (default: BenchmarkSuiteConfig())
        protocols: List of protocol IDs or instances (default: all baselines + shadows)
        locality_map: Optional mapping of observable_id -> locality (Pauli weight)

    Returns:
        BenchmarkSuiteResult with all outputs and paths to saved reports

    Example:
        >>> from quartumse import run_benchmark_suite, BenchmarkMode, BenchmarkSuiteConfig
        >>>
        >>> # Basic benchmark
        >>> result = run_benchmark_suite(circuit, observables, circuit_id="ghz_4q")
        >>>
        >>> # Complete with all 8 tasks
        >>> config = BenchmarkSuiteConfig(mode=BenchmarkMode.COMPLETE)
        >>> result = run_benchmark_suite(circuit, observables, config=config)
        >>>
        >>> # Full analysis
        >>> config = BenchmarkSuiteConfig(mode=BenchmarkMode.ANALYSIS)
        >>> result = run_benchmark_suite(circuit, observables, config=config)
        >>>
        >>> print(f"Reports saved to: {result.output_dir}")
    """
    from .benchmarking import run_publication_benchmark

    if config is None:
        config = BenchmarkSuiteConfig()

    # Generate unique run ID and output directory
    run_id = _generate_run_id(circuit_id)
    timestamp = datetime.now()
    output_dir = _create_output_dir(config.output_base_dir, run_id)

    print("=" * 70)
    print(f"BENCHMARK SUITE: {config.mode.value.upper()}")
    print("=" * 70)
    print(f"Run ID: {run_id}")
    print(f"Output: {output_dir}")
    print(f"Mode: {config.mode.value}")
    print()

    # Step 1: Run base benchmark
    noise_info = f" (noise: {config.noise_profile})" if config.noise_profile else ""
    print(f"Step 1: Running base benchmark{noise_info}...")
    base_results = run_publication_benchmark(
        circuit=circuit,
        observable_set=observable_set,
        protocols=protocols,
        n_shots_grid=config.n_shots_grid,
        n_replicates=config.n_replicates,
        seed=config.seed,
        compute_truth=config.compute_truth,
        circuit_id=circuit_id,
        output_dir=str(output_dir / "base"),
        epsilon=config.epsilon,
        delta=config.delta,
        noise_profile=config.noise_profile,
        timeout_per_protocol_s=config.timeout_per_protocol_s,
        hw_timing_profile=config.hw_timing_profile,
    )

    long_form_rows = base_results["long_form_results"]
    truth_values = (
        base_results["ground_truth"].truth_values if base_results["ground_truth"] else None
    )
    task_results = base_results["task_results"]
    summary = base_results["summary"]
    protocol_ids = summary.get("protocols", [])

    print(f"  Completed: {len(long_form_rows)} rows")
    print()

    # Initialize result containers
    all_task_results = None
    analysis = None
    reports = {}

    # Step 2: Run additional tasks for complete/analysis modes
    if config.mode in [BenchmarkMode.COMPLETE, BenchmarkMode.ANALYSIS]:
        print("Step 2: Running all 8 tasks...")
        all_task_results = _run_all_tasks(
            long_form_rows=long_form_rows,
            truth_values=truth_values,
            n_shots_grid=config.n_shots_grid,
            n_replicates=config.n_replicates,
            epsilon=config.epsilon,
            delta=config.delta,
            protocol_ids=protocol_ids,
        )
        print(f"  Completed: {len(all_task_results)} task evaluations")
        print()

    # Step 3: Run comprehensive analysis for analysis mode
    if config.mode == BenchmarkMode.ANALYSIS:
        print("Step 3: Running comprehensive analysis...")
        from .analysis import run_comprehensive_analysis

        analysis = run_comprehensive_analysis(
            long_form_results=long_form_rows,
            truth_values=truth_values,
            epsilon=config.epsilon,
            delta=config.delta,
            locality_map=locality_map,
            run_id=run_id,
            shadows_protocol_id=config.shadows_protocol_id,
            baseline_protocol_id=config.baseline_protocol_id,
        )
        print("  Comprehensive analysis complete")
        print()

    # Step 4: Generate reports
    print("Step 4: Generating reports...")

    # Always generate basic report
    basic_report_path = _generate_basic_report(
        run_id=run_id,
        summary=summary,
        task_results=task_results,
        output_dir=output_dir,
    )
    reports["basic"] = basic_report_path
    print(f"  Basic report: {basic_report_path}")

    # Generate complete report for complete/analysis modes
    if config.mode in [BenchmarkMode.COMPLETE, BenchmarkMode.ANALYSIS]:
        complete_report_path = _generate_complete_report(
            run_id=run_id,
            summary=summary,
            all_task_results=all_task_results or {},
            long_form_rows=long_form_rows,
            truth_values=truth_values,
            config=config,
            output_dir=output_dir,
        )
        reports["complete"] = complete_report_path
        print(f"  Complete report: {complete_report_path}")

    # Generate analysis report for analysis mode
    if config.mode == BenchmarkMode.ANALYSIS and analysis:
        analysis_report_content = analysis.generate_report()
        analysis_report_path = output_dir / "analysis_report.md"
        analysis_report_path.write_text(analysis_report_content, encoding="utf-8")
        reports["analysis"] = analysis_report_path
        print(f"  Analysis report: {analysis_report_path}")

        # Also save JSON
        analysis_json_path = output_dir / "analysis.json"
        analysis.save(analysis_json_path)
        reports["analysis_json"] = analysis_json_path
        print(f"  Analysis JSON: {analysis_json_path}")

    # Save config
    config_path = output_dir / "config.json"
    with open(config_path, "w") as f:
        json.dump(config.to_dict(), f, indent=2)
    reports["config"] = config_path

    # Save run manifest
    manifest = {
        "run_id": run_id,
        "timestamp": timestamp.isoformat(),
        "mode": config.mode.value,
        "circuit_id": circuit_id,
        "n_observables": len(observable_set),
        "n_protocols": len(protocol_ids),
        "protocols": protocol_ids,
        "n_shots_grid": config.n_shots_grid,
        "n_replicates": config.n_replicates,
        "n_long_form_rows": len(long_form_rows),
        "has_ground_truth": truth_values is not None,
        "tasks_completed": list(task_results.keys()),
        "all_tasks_completed": list(all_task_results.keys()) if all_task_results else [],
        "has_analysis": analysis is not None,
        "reports": {k: str(v) for k, v in reports.items()},
    }
    manifest_path = output_dir / "manifest.json"
    with open(manifest_path, "w") as f:
        json.dump(manifest, f, indent=2)
    reports["manifest"] = manifest_path

    print()
    print("=" * 70)
    print("BENCHMARK COMPLETE")
    print("=" * 70)
    print(f"Output directory: {output_dir}")
    print(f"Reports generated: {list(reports.keys())}")
    print()

    return BenchmarkSuiteResult(
        run_id=run_id,
        timestamp=timestamp,
        mode=config.mode,
        output_dir=output_dir,
        ground_truth=base_results["ground_truth"],
        long_form_results=long_form_rows,
        task_results=task_results,
        all_task_results=all_task_results,
        analysis=analysis,
        reports=reports,
        summary=summary,
    )

run_comprehensive_analysis(long_form_results, truth_values=None, epsilon=0.01, delta=0.05, locality_map=None, run_id='comprehensive_analysis', shadows_protocol_id='classical_shadows_v0', baseline_protocol_id='direct_grouped')

Run comprehensive benchmark analysis with all improvements.

Parameters:

Name Type Description Default
long_form_results list[LongFormRow]

Long-form benchmark results

required
truth_values dict[str, float] | None

Ground truth values for observables

None
epsilon float

Target precision

0.01
delta float

Failure probability

0.05
locality_map dict[str, int] | None

Map observable_id -> locality

None
run_id str

Analysis identifier

'comprehensive_analysis'
shadows_protocol_id str

ID of shadows protocol for comparison

'classical_shadows_v0'
baseline_protocol_id str

ID of baseline protocol for comparison

'direct_grouped'

Returns:

Type Description
ComprehensiveBenchmarkAnalysis

ComprehensiveBenchmarkAnalysis with all results

Source code in src/quartumse/analysis/comprehensive.py
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
def run_comprehensive_analysis(
    long_form_results: list[LongFormRow],
    truth_values: dict[str, float] | None = None,
    epsilon: float = 0.01,
    delta: float = 0.05,
    locality_map: dict[str, int] | None = None,
    run_id: str = "comprehensive_analysis",
    shadows_protocol_id: str = "classical_shadows_v0",
    baseline_protocol_id: str = "direct_grouped",
) -> ComprehensiveBenchmarkAnalysis:
    """Run comprehensive benchmark analysis with all improvements.

    Args:
        long_form_results: Long-form benchmark results
        truth_values: Ground truth values for observables
        epsilon: Target precision
        delta: Failure probability
        locality_map: Map observable_id -> locality
        run_id: Analysis identifier
        shadows_protocol_id: ID of shadows protocol for comparison
        baseline_protocol_id: ID of baseline protocol for comparison

    Returns:
        ComprehensiveBenchmarkAnalysis with all results
    """
    if not long_form_results:
        raise ValueError("No results provided")

    # Extract metadata
    protocols = list({r.protocol_id for r in long_form_results})
    n_shots_grid = sorted({r.N_total for r in long_form_results})
    n_observables = len({r.observable_id for r in long_form_results})

    # Separate results by protocol
    by_protocol: dict[str, list[LongFormRow]] = {}
    for row in long_form_results:
        if row.protocol_id not in by_protocol:
            by_protocol[row.protocol_id] = []
        by_protocol[row.protocol_id].append(row)

    task_analyses = {}

    # =========================================================================
    # Task 1: Worst-Case with interpolation
    # =========================================================================
    task1_config = TaskConfig(
        task_id="task1_worst_case",
        task_type=TaskType.WORST_CASE,
        epsilon=epsilon,
        delta=delta,
        n_grid=n_shots_grid,
        criterion_type=CriterionType.TRUTH_BASED if truth_values else CriterionType.CI_BASED,
    )
    task1 = WorstCaseTask(task1_config)

    task1_results = {}
    for protocol_id, rows in by_protocol.items():
        output = task1.evaluate(rows, truth_values)
        task1_results[protocol_id] = output.to_task_result(run_id)

    # Enhanced: 95th percentile N*
    percentile_results = {}
    for protocol_id, rows in by_protocol.items():
        by_n = {}
        for row in rows:
            if row.N_total not in by_n:
                by_n[row.N_total] = []
            by_n[row.N_total].append(row.se)
        n_star_95, pct_by_n = compute_percentile_n_star(by_n, epsilon, percentile=95)
        percentile_results[protocol_id] = {"n_star_95th": n_star_95, "percentiles": pct_by_n}

    task_analyses["task1_worst_case"] = TaskAnalysis(
        task_id="task1_worst_case",
        task_type="worst_case",
        base_results={p: {"n_star": r.N_star} for p, r in task1_results.items()},
        enhanced_results={"percentile_95_n_star": percentile_results},
    )

    # =========================================================================
    # Task 2: Average Target
    # =========================================================================
    task2_config = TaskConfig(
        task_id="task2_average",
        task_type=TaskType.AVERAGE_TARGET,
        epsilon=epsilon,
        delta=delta,
        n_grid=n_shots_grid,
        criterion_type=CriterionType.TRUTH_BASED if truth_values else CriterionType.CI_BASED,
    )
    task2 = AverageTargetTask(task2_config)

    task2_results = {}
    for protocol_id, rows in by_protocol.items():
        output = task2.evaluate(rows, truth_values)
        task2_results[protocol_id] = output.metrics

    task_analyses["task2_average"] = TaskAnalysis(
        task_id="task2_average",
        task_type="average_target",
        base_results=task2_results,
    )

    # =========================================================================
    # Task 3: Fixed Budget Distribution
    # =========================================================================
    task3_config = TaskConfig(
        task_id="task3_distribution",
        task_type=TaskType.FIXED_BUDGET,
        epsilon=epsilon,
        n_grid=n_shots_grid,
    )
    task3 = FixedBudgetDistributionTask(task3_config)

    task3_results = {}
    for protocol_id, rows in by_protocol.items():
        output = task3.evaluate(rows, truth_values)
        task3_results[protocol_id] = output.details.get("se_distributions", {})

    task_analyses["task3_distribution"] = TaskAnalysis(
        task_id="task3_distribution",
        task_type="fixed_budget",
        base_results={"distributions": task3_results},
    )

    # =========================================================================
    # Task 4: Dominance with enhanced crossover
    # =========================================================================
    task4_config = TaskConfig(
        task_id="task4_dominance",
        task_type=TaskType.DOMINANCE,
        epsilon=epsilon,
        delta=delta,
        n_grid=n_shots_grid,
        criterion_type=CriterionType.TRUTH_BASED if truth_values else CriterionType.CI_BASED,
    )
    task4 = DominanceTask(task4_config)

    dominance_results = {}
    if shadows_protocol_id in by_protocol and baseline_protocol_id in by_protocol:
        dominance_results = task4.compare_protocols(
            by_protocol[shadows_protocol_id],
            by_protocol[baseline_protocol_id],
            truth_values,
            metric="mean_se",
        )

    task_analyses["task4_dominance"] = TaskAnalysis(
        task_id="task4_dominance",
        task_type="dominance",
        base_results=dominance_results,
    )

    # =========================================================================
    # Task 5: Pilot Selection
    # =========================================================================
    task5_config = TaskConfig(
        task_id="task5_pilot",
        task_type=TaskType.PILOT_SELECTION,
        epsilon=epsilon,
        delta=delta,
        n_grid=n_shots_grid,
        criterion_type=CriterionType.TRUTH_BASED if truth_values else CriterionType.CI_BASED,
        additional_params={"pilot_n": n_shots_grid[0], "target_n": n_shots_grid[-1]},
    )
    task5 = PilotSelectionTask(task5_config)
    task5_output = task5.evaluate(long_form_results, truth_values)

    task_analyses["task5_pilot"] = TaskAnalysis(
        task_id="task5_pilot",
        task_type="pilot_selection",
        base_results=task5_output.metrics,
    )

    # =========================================================================
    # Task 6: Bias-Variance
    # =========================================================================
    if truth_values:
        task6_config = TaskConfig(
            task_id="task6_biasvar",
            task_type=TaskType.BIAS_VARIANCE,
            n_grid=n_shots_grid,
        )
        task6 = BiasVarianceTask(task6_config)

        task6_results = {}
        for protocol_id, rows in by_protocol.items():
            output = task6.evaluate(rows, truth_values)
            task6_results[protocol_id] = output.metrics

        task_analyses["task6_biasvar"] = TaskAnalysis(
            task_id="task6_biasvar",
            task_type="bias_variance",
            base_results=task6_results,
        )

    # =========================================================================
    # Per-observable crossover analysis
    # =========================================================================
    crossover_analysis = None
    if shadows_protocol_id in by_protocol and baseline_protocol_id in by_protocol:
        crossover_analysis = per_observable_crossover(
            by_protocol[shadows_protocol_id],
            by_protocol[baseline_protocol_id],
            metric="se",
            interpolate=True,
        )

    # =========================================================================
    # Locality analysis
    # =========================================================================
    locality_analysis = analyze_by_locality(
        long_form_results, n_total=n_shots_grid[-1], locality_map=locality_map
    )

    # =========================================================================
    # Statistical comparison
    # =========================================================================
    statistical_comparison = {}
    if shadows_protocol_id in by_protocol and baseline_protocol_id in by_protocol:
        for n in n_shots_grid:
            try:
                comparison = compare_protocols_statistically(
                    by_protocol[shadows_protocol_id],
                    by_protocol[baseline_protocol_id],
                    n_total=n,
                    epsilon=epsilon,
                    n_bootstrap=5000,
                )
                statistical_comparison[n] = comparison
            except Exception:
                pass

    # =========================================================================
    # Cost-normalized analysis
    # =========================================================================
    cost_model = CostModel()
    cost_results = compute_cost_normalized_metrics(long_form_results, cost_model, truth_values)
    cost_comparison = compare_cost_normalized(cost_results, n_shots_grid[-1])
    cost_analysis = {
        "cost_model": cost_model.to_dict(),
        "comparison_at_max_n": cost_comparison,
    }

    # =========================================================================
    # Multi-pilot analysis
    # =========================================================================
    pilot_analysis = multi_pilot_analysis(
        long_form_results,
        target_n=n_shots_grid[-1],
        pilot_fractions=[0.02, 0.05, 0.10, 0.20],
    )

    # Also compute interpolated pilot analysis (avoids grid degeneracy)
    pilot_analysis_interp = interpolated_pilot_analysis(
        long_form_results,
        target_n=n_shots_grid[-1],
        pilot_fractions=[0.02, 0.05, 0.10, 0.20],
    )

    # =========================================================================
    # N* interpolation
    # =========================================================================
    interpolated_n_star = {}
    for protocol_id, rows in by_protocol.items():
        # Compute mean SE at each N
        by_n: dict[int, list[float]] = {}
        for row in rows:
            if row.N_total not in by_n:
                by_n[row.N_total] = []
            by_n[row.N_total].append(row.se)

        ns = sorted(by_n.keys())
        mean_ses = [np.mean(by_n[n]) for n in ns]

        n_star, fit = interpolate_n_star(ns, mean_ses, epsilon, method="power_law")

        interpolated_n_star[protocol_id] = {
            "n_star_interpolated": n_star,
            "amplitude": fit.amplitude if fit else None,
            "exponent": fit.exponent if fit else None,
            "r_squared": fit.r_squared if fit else None,
        }

    # =========================================================================
    # Executive summary
    # =========================================================================
    summary = {
        "n_protocols": len(protocols),
        "n_observables": n_observables,
        "n_shots_evaluated": len(n_shots_grid),
        "max_shots": n_shots_grid[-1],
    }

    # Add key findings
    if shadows_protocol_id in by_protocol and baseline_protocol_id in by_protocol:
        shadows_se = np.mean(
            [r.se for r in by_protocol[shadows_protocol_id] if r.N_total == n_shots_grid[-1]]
        )
        baseline_se = np.mean(
            [r.se for r in by_protocol[baseline_protocol_id] if r.N_total == n_shots_grid[-1]]
        )
        summary["shadows_mean_se"] = float(shadows_se)
        summary["baseline_mean_se"] = float(baseline_se)
        summary["shadows_vs_baseline_ratio"] = (
            float(shadows_se / baseline_se) if baseline_se > 0 else float("inf")
        )
        summary["winner_at_max_n"] = (
            shadows_protocol_id if shadows_se < baseline_se else baseline_protocol_id
        )

    if crossover_analysis:
        summary["shadows_wins_fraction"] = crossover_analysis.summary.get("a_win_fraction", 0)
        summary["baseline_wins_fraction"] = crossover_analysis.summary.get("b_win_fraction", 0)

    if pilot_analysis and pilot_analysis.optimal_fraction:
        summary["optimal_pilot_fraction"] = pilot_analysis.optimal_fraction

    return ComprehensiveBenchmarkAnalysis(
        run_id=run_id,
        protocols=protocols,
        n_observables=n_observables,
        n_shots_grid=n_shots_grid,
        task_analyses=task_analyses,
        crossover_analysis=crossover_analysis,
        locality_analysis={p: a.to_dict() for p, a in locality_analysis.items()},
        statistical_comparison=statistical_comparison,
        cost_analysis=cost_analysis,
        pilot_analysis=pilot_analysis,
        pilot_analysis_interpolated=pilot_analysis_interp,
        interpolated_n_star=interpolated_n_star,
        summary=summary,
    )

run_posthoc_benchmark_from_suite(posthoc_suite, n_rounds=5, observables_per_round=None, shadows_shots=1000, direct_shots_per_basis=100, seed=42)

Run post-hoc benchmark using a posthoc library suite.

Parameters:

Name Type Description Default
posthoc_suite ObservableSuite

ObservableSuite with POSTHOC type

required
n_rounds int

Number of query rounds

5
observables_per_round int | None

Observables per round (default: library_size // n_rounds)

None
shadows_shots int

Total shots for shadows acquisition

1000
direct_shots_per_basis int

Shots per measurement basis for direct

100
seed int

Random seed

42

Returns:

Type Description
PosthocBenchmarkResult

PosthocBenchmarkResult

Source code in src/quartumse/analysis/posthoc_benchmark.py
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
def run_posthoc_benchmark_from_suite(
    posthoc_suite: ObservableSuite,
    n_rounds: int = 5,
    observables_per_round: int | None = None,
    shadows_shots: int = 1000,
    direct_shots_per_basis: int = 100,
    seed: int = 42,
) -> PosthocBenchmarkResult:
    """Run post-hoc benchmark using a posthoc library suite.

    Args:
        posthoc_suite: ObservableSuite with POSTHOC type
        n_rounds: Number of query rounds
        observables_per_round: Observables per round (default: library_size // n_rounds)
        shadows_shots: Total shots for shadows acquisition
        direct_shots_per_basis: Shots per measurement basis for direct
        seed: Random seed

    Returns:
        PosthocBenchmarkResult
    """
    # Get observable IDs from suite
    library_ids = [obs.observable_id for obs in posthoc_suite.observables]
    library_size = len(library_ids)

    if observables_per_round is None:
        observables_per_round = max(1, library_size // n_rounds)

    # Generate query rounds
    rounds = generate_query_rounds(
        library_observable_ids=library_ids,
        n_rounds=n_rounds,
        observables_per_round=observables_per_round,
        strategy="random",
        seed=seed,
    )

    # Build basis mapping from observable Pauli strings
    # For simplicity: observables with same non-identity positions can share a basis
    # In practice, this depends on grouping strategy
    observable_to_basis = {}
    basis_to_observables = {}

    for obs in posthoc_suite.observables:
        # Use the Pauli string's "basis" (Z positions) as grouping key
        # This is a simplification - real grouping is more complex
        pauli = obs.pauli_string

        # Extract measurement basis (which qubits in which Pauli basis)
        # For now, use the full string as the basis (conservative)
        basis = pauli

        observable_to_basis[obs.observable_id] = basis
        if basis not in basis_to_observables:
            basis_to_observables[basis] = set()
        basis_to_observables[basis].add(obs.observable_id)

    # Run simulation
    return simulate_posthoc_benchmark(
        library_observable_ids=library_ids,
        query_rounds=rounds,
        shadows_shots_per_acquisition=shadows_shots,
        direct_shots_per_basis=direct_shots_per_basis,
        observable_to_basis=observable_to_basis,
        basis_to_observables=basis_to_observables,
    )

sample_random_paulis(n_qubits, n_samples, strategy='stratified', max_weight=None, seed=42)

Sample random Pauli observables using various strategies.

Strategies

'stratified': Equal samples per weight class (best for benchmarking) 'uniform': Each qubit i.i.d. I/X/Y/Z (biases toward middle weights) 'importance': Weight k sampled with prob ∝ 1/3^k (matches shadows variance) 'uniform_weight': Uniform over weight classes, then uniform within

Parameters:

Name Type Description Default
n_qubits int

Number of qubits

required
n_samples int

Number of Paulis to sample

required
strategy Literal['stratified', 'uniform', 'importance', 'uniform_weight']

Sampling strategy

'stratified'
max_weight int | None

Maximum Pauli weight (default: n_qubits)

None
seed int

Random seed

42

Returns:

Type Description
list[str]

List of sampled Pauli strings

Source code in src/quartumse/observables/suites.py
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
def sample_random_paulis(
    n_qubits: int,
    n_samples: int,
    strategy: Literal["stratified", "uniform", "importance", "uniform_weight"] = "stratified",
    max_weight: int | None = None,
    seed: int = 42,
) -> list[str]:
    """Sample random Pauli observables using various strategies.

    Strategies:
        'stratified': Equal samples per weight class (best for benchmarking)
        'uniform': Each qubit i.i.d. I/X/Y/Z (biases toward middle weights)
        'importance': Weight k sampled with prob ∝ 1/3^k (matches shadows variance)
        'uniform_weight': Uniform over weight classes, then uniform within

    Args:
        n_qubits: Number of qubits
        n_samples: Number of Paulis to sample
        strategy: Sampling strategy
        max_weight: Maximum Pauli weight (default: n_qubits)
        seed: Random seed

    Returns:
        List of sampled Pauli strings
    """
    rng = np.random.default_rng(seed)
    if max_weight is None:
        max_weight = n_qubits

    sampled = set()
    pauli_ops = ["X", "Y", "Z"]

    if strategy == "stratified":
        samples_per_k = max(1, n_samples // max_weight)
        remainder = n_samples - samples_per_k * max_weight

        for k in range(1, max_weight + 1):
            n_at_k = samples_per_k + (1 if k <= remainder else 0)
            max_possible = comb(n_qubits, k) * (3**k)

            if n_at_k >= max_possible:
                sampled.update(generate_all_k_local(n_qubits, k))
            else:
                count = 0
                attempts = 0
                while count < n_at_k and attempts < n_at_k * 100:
                    positions = tuple(sorted(rng.choice(n_qubits, k, replace=False)))
                    ops = tuple(rng.choice(pauli_ops, k))
                    pauli_list = ["I"] * n_qubits
                    for pos, op in zip(positions, ops, strict=False):
                        pauli_list[pos] = op
                    pauli = "".join(pauli_list)
                    if pauli not in sampled:
                        sampled.add(pauli)
                        count += 1
                    attempts += 1

    elif strategy == "uniform":
        attempts = 0
        while len(sampled) < n_samples and attempts < n_samples * 100:
            pauli_list = rng.choice(["I", "X", "Y", "Z"], n_qubits)
            pauli = "".join(pauli_list)
            weight = sum(1 for c in pauli if c != "I")
            if 0 < weight <= max_weight and pauli not in sampled:
                sampled.add(pauli)
            attempts += 1

    elif strategy == "importance":
        weights = np.array([1.0 / (3**k) for k in range(1, max_weight + 1)])
        weights /= weights.sum()

        attempts = 0
        while len(sampled) < n_samples and attempts < n_samples * 100:
            k = rng.choice(range(1, max_weight + 1), p=weights)
            positions = tuple(sorted(rng.choice(n_qubits, k, replace=False)))
            ops = tuple(rng.choice(pauli_ops, k))
            pauli_list = ["I"] * n_qubits
            for pos, op in zip(positions, ops, strict=False):
                pauli_list[pos] = op
            pauli = "".join(pauli_list)
            if pauli not in sampled:
                sampled.add(pauli)
            attempts += 1

    elif strategy == "uniform_weight":
        attempts = 0
        while len(sampled) < n_samples and attempts < n_samples * 100:
            k = rng.integers(1, max_weight + 1)
            positions = tuple(sorted(rng.choice(n_qubits, k, replace=False)))
            ops = tuple(rng.choice(pauli_ops, k))
            pauli_list = ["I"] * n_qubits
            for pos, op in zip(positions, ops, strict=False):
                pauli_list[pos] = op
            pauli = "".join(pauli_list)
            if pauli not in sampled:
                sampled.add(pauli)
            attempts += 1

    else:
        raise ValueError(f"Unknown sampling strategy: {strategy}")

    return list(sampled)

Commands

See the CLI reference for command-line usage details.