# Copyright 2022 MTS (Mobile Telesystems)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Experiment design methods.
Module contains `Designer` core class and `design` method which are
intended to conduct the experiment design for A/B/.. tests via different
methods.
Experiment design of the individual metric is based on its historical data
and could be done for any parameter from the self-dependent triplet:
group size, effect size and experiment power.
Currently, experiment design problem could be solved using data provided
in form of both pandas and Spark(with some restrictions) dataframes.
"""
from __future__ import annotations
from typing import List, Optional
import numpy as np
import pandas as pd
import yaml
import ambrosia.tools.bin_intervals as bin_pkg
import ambrosia.tools.theoretical_tools as theory_pkg
from ambrosia import types
from ambrosia.tools.ab_abstract_component import ABMetaClass, ABToolAbstract, SimpleDesigner
from .handlers import EmpiricHandler, TheoryHandler, calc_prob_control_class
SIZE: str = "size"
EFFECT: str = "effect"
POWER: str = "power"
BINARY_DESIGN_METHODS: List[str] = ["theory", "binary"]
[docs]
class Designer(yaml.YAMLObject, ABToolAbstract, metaclass=ABMetaClass):
"""
Unit for experiments and pilots parameters design.
Enables to design missing experiment parameters using historical data.
The main related to each other designable parameters for a single metric are:
- Effect (Minimal Detectible Effect):
old_mean_metric_value * effect_value = new_mean_metric_value
- Sample size:
Number of research objects in sample
(for example number of users and their retention).
- Errors (I type error, II type error):
I error (alpha):
Probability to detect presence of effect
for equally distributed samples.
II error (beta):
Probability not to find effect
for differently distributed samples.
Parameters
----------
dataframe : PassedDataType, optional
DataFrame with metrics historical values.
sizes : SampleSizeType, optional
Values of research objects number in groups samples during
the experiment.
effects : EffectType, optional
Effects values that are expected during the experiment.
first_type_errors : StatErrorType, default: ``0.05``
I type error bounds
P (detect difference for equal) < alpha.
second_type_errors : StatErrorType, default: ``0.2``
II type error bounds
P (suppose equality for different groups) < beta.
metrics : MetricNamesType, optional
Column names of metrics in dataframe to be designed.
method : str, optional
Method used for experiment design.
Can be ``"theory"``, ``"empiric"`` or ``"binary"``.
Attributes
----------
dataframe : PassedDataType
DataFrame with metrics historocal values.
sizes : SampleSizeType
Number of research objects in group samples.
effects : EffectType
Effects values in the experiment.
first_type_errors : StatErrorType, default: ``0.05``
I type errors.
second_type_errors : StatErrorType, default: ``0.2``
II type errors.
metrics : MetricNamesType
Column names of metrics in dataframe to be designed.
method : str
Method used for experiment design.
Examples
--------
We have retention labels for users of mobile app for previous month.
Suppose old_retention = ``0.3``, that is 30% of users returned to the app
in a month after installation.
Let us fix the following parameters:
I type error (alpha) = ``0.05``
(5% of equal samples we can suppose to be different).
II type error (beta) = ``0.2``
(20% of different sampels we can suppose to be equal).
We add onboarding to our app and want to estimate an effect, by A/B testing
and wish to increase retention value to 31% percents, so our effect
parameter gets value of ``1.0(3)``. Now we want to find how much users we
need in both groups to detect such effect.
We can use ``Designer`` class in the following way:
>>> designer = Designer(dataframe=df, metric='retention', effect=1.033)
>>> designer.run("size")
Note, that default values for errors are:
``first_type_error`` = ``0.05``
``second_type_error`` = ``0.2``
Then we get dataframe that contains value of sufficient number of users
for our experiment.
Notes
-----
Constructors:
>>> designer = Designer()
>>> # You can pass an Iterable or single object for some parameters
>>> designer = Designer(
>>> dataframe=df,
>>> sizes=[100, 200],
>>> metrics='LTV',
>>> effects=1.05
>>> )
>>> designer = Desginer(sizes=1000, metrics=['retention', 'LTV'])
>>> # You can use path to .csv table for pandas
>>> designer = Designer('./data/table.csv')
Setters:
>>> designer.set_first_errors([0.05, 0.01])
>>> desginer.set_dataframe(df)
Run:
>>> # One can pass arguments and they will have higher priority
>>> designer.run('size', effects=1.1)
>>> designer.run('effect', sizes=[500, 1000], metrics='retention')
>>> # You can set method (watch below)
>>> designer.run('effect', sizes=[500, 1000], metrics='retention', method='binary')
Load from yaml config:
>>> config = '''
!splitter # <--- this is yaml tag (!important)
effects:
- 0.9
- 1.05
sizes:
- 1000
'''
>>> designer = yaml.load(config)
>>> # Or use the implmented function
>>> designer = load_from_config(config)
Use standalone function instead of a class:
>>> design('size', dataframe=df, effects=1.05, metrics='retention')
"""
# YAML tag for loading from configs
yaml_tag = "!designer"
def set_first_errors(self, first_type_errors: types.StatErrorType) -> None:
if isinstance(first_type_errors, float):
self.__alpha = [first_type_errors]
else:
self.__alpha = first_type_errors
def set_second_errors(self, second_type_errors: types.StatErrorType) -> None:
if isinstance(second_type_errors, float):
self.__beta = [second_type_errors]
else:
self.__beta = second_type_errors
def set_sizes(self, sizes: types.SampleSizeType) -> None:
if isinstance(sizes, int):
self.__size = [sizes]
else:
self.__size = sizes
def set_effects(self, effects: types.EffectType) -> None:
if isinstance(effects, (float, int)):
self.__effect = [effects]
else:
self.__effect = effects
def set_dataframe(self, dataframe: types.PassedDataType) -> None:
if isinstance(dataframe, str):
if dataframe.endswith(".csv"):
self.__df = pd.read_csv(dataframe)
else:
raise ValueError("File name must ends with .csv")
else:
self.__df = dataframe
def set_method(self, method: str) -> None:
self.__method = method
def set_metrics(self, metrics: str) -> None:
if isinstance(metrics, types.MetricNameType):
self.__metrics = [metrics]
else:
self.__metrics = metrics
def __init__(
self,
dataframe: Optional[types.PassedDataType] = None,
sizes: Optional[types.SampleSizeType] = None,
effects: Optional[types.EffectType] = None,
first_type_errors: types.StatErrorType = 0.05,
second_type_errors: types.StatErrorType = 0.2,
metrics: Optional[types.MetricNamesType] = None,
method: str = "theory",
):
"""
Designer class constructor to initialize the object.
"""
self.set_first_errors(first_type_errors)
self.set_second_errors(second_type_errors)
self.set_sizes(sizes)
self.set_effects(effects)
self.set_metrics(metrics)
self.set_dataframe(dataframe)
self.set_method(method)
def __getstate__(self):
"""
Get the state of the object to serialize.
"""
return dict(
effects=self.__effect,
sizes=self.__size,
first_type_errors=self.__alpha,
second_type_errors=self.__beta,
metrics=self.__metrics,
method=self.__method,
)
@classmethod
def from_yaml(cls, loader: yaml.Loader, node: yaml.Node):
kwargs = loader.construct_mapping(node)
return cls(**kwargs)
@staticmethod
def __dataframe_handler(handler: SimpleDesigner, parameter: str, **kwargs) -> pd.DataFrame:
"""
Handles different dataframe types.
Now pandas and spark are available.
"""
if parameter == SIZE:
return handler.size_design(**kwargs)
elif parameter == EFFECT:
return handler.effect_design(**kwargs)
elif parameter == POWER:
return handler.power_design(**kwargs)
else:
raise ValueError(f"Only {SIZE}, {EFFECT} and {POWER} parameters of the experiment could be designed.")
@staticmethod
def __theory_design(label: str, args: types._UsageArgumentsType, **kwargs) -> types.DesignerResult:
"""
Designing an experiment, using a theoretical approach.
"""
result: types.DesignerResult = {}
for metric_name in args["metric"]:
kwargs["dataframe"] = args["df"]
kwargs["column"] = metric_name
kwargs["first_errors"] = np.array(args["alpha"])
if label == SIZE:
kwargs["effects"] = args[EFFECT]
kwargs["second_errors"] = np.array(args["beta"])
elif label == EFFECT:
kwargs["sample_sizes"] = args[SIZE]
kwargs["second_errors"] = np.array(args["beta"])
elif label == POWER:
kwargs["sample_sizes"] = args[SIZE]
kwargs["effects"] = args[EFFECT]
result[metric_name] = Designer.__dataframe_handler(TheoryHandler(), label, **kwargs)
if len(args["metric"]) == 1:
return result[args["metric"][0]]
else:
return result
@staticmethod
def __empiric_design(label: str, args: types._UsageArgumentsType, **kwargs) -> types.DesignerResult:
"""
Designing an experiment, using an empirical approach.
"""
kwargs["dataframe"] = args["df"]
kwargs["alphas"] = np.array(args["alpha"])
kwargs["metrics"] = args["metric"]
if label == SIZE:
kwargs["effects"] = args[EFFECT]
kwargs["betas"] = np.array(args["beta"])
elif label == EFFECT:
kwargs["group_sizes"] = args[SIZE]
kwargs["betas"] = np.array(args["beta"])
elif label == POWER:
groups_ratio: float = kwargs.pop("groups_ratio") if "groups_ratio" in kwargs else 1.0
kwargs["sample_sizes_a"] = args[SIZE]
kwargs["sample_sizes_b"] = [int(groups_ratio * size) for size in args[SIZE]]
kwargs["effects"] = args[EFFECT]
return Designer.__dataframe_handler(EmpiricHandler(), label, **kwargs)
@staticmethod
def __binary_design(label: str, args: types._UsageArgumentsType, **kwargs) -> types.DesignerResult:
"""
Designing an experiment, using the approach for binary metrics.
"""
result: types.DesignerResult = {}
kwargs["first_errors"] = np.array(args["alpha"])
for metric_name in args["metric"]:
kwargs["p_a"] = calc_prob_control_class(args["df"], metric_name)
if label == SIZE:
kwargs["delta_relative_values"] = args[EFFECT]
kwargs["second_errors"] = args["beta"]
result[metric_name] = bin_pkg.get_table_sample_size_on_effect(**kwargs)
elif label == EFFECT:
kwargs["second_errors"] = args["beta"]
kwargs["sample_sizes"] = args[SIZE]
result[metric_name] = bin_pkg.get_table_effect_on_sample_size(**kwargs)
elif label == POWER:
kwargs["delta_relative_values"] = args[EFFECT]
kwargs["sample_sizes"] = args[SIZE]
result[metric_name] = bin_pkg.get_table_power_on_size_and_delta(**kwargs)
if len(args["metric"]) == 1:
return result[args["metric"][0]]
else:
return result
@staticmethod
def __pre_design(label: str, args: types._UsageArgumentsType, **kwargs) -> types.DesignerResult:
"""
Helper function for run() method logic.
"""
admissible_methods: List[str] = ["theory", "empiric", "binary"]
if args["method"] == "theory":
return Designer.__theory_design(label, args, **kwargs)
elif args["method"] == "empiric":
return Designer.__empiric_design(label, args, **kwargs)
elif args["method"] == "binary":
return Designer.__binary_design(label, args, **kwargs)
else:
raise ValueError(f'Choose method from {", ".join(admissible_methods)}, got {args["method"]}')
[docs]
def run(
self,
to_design: str,
method: Optional[str] = None,
sizes: Optional[types.SampleSizeType] = None,
effects: Optional[types.EffectType] = None,
first_type_errors: Optional[types.StatErrorType] = None,
second_type_errors: Optional[types.StatErrorType] = None,
dataframe: Optional[types.PassedDataType] = None,
metrics: Optional[types.MetricNamesType] = None,
**kwargs,
) -> types.DesignerResult:
"""
Perform an experiment design for chosen parameter and metrics
using historical data.
Parameters
----------
to_design : str
Parameter that will be designed using historical data.
Can take the values of ``"size"``, ``"effect"`` or ``"power"``.
method : str, optional
Method used for experiment design.
Can be ``"theory"``, ``"empiric"`` or ``"binary"``.
sizes : SampleSizeType, optional
Values of research objects number in groups samples during
the experiment.
If is not provided, must exist as proper class attribute.
effects : EffectType, optional
Effects for experiment
If is not provided, must exist as proper class attribute.
first_type_errors : StatErrorType, optional
I type error bounds
P (detect difference for equal) < alpha.
second_type_errors : StatErrorType, optional
II type error bounds
P (suppose equality for different groups) < beta.
dataframe : PassedDataType, optional
DataFrame with metrics historical values.
If is not provided, must exist as proper class attribute.
metrics : MetricNamesType, optional
Column names of metrics in dataframe to be designed.
If not provided, must exist as proper class attribute.
**kwargs : Dict
Other keyword arguments.
Other Parameters
----------------
as_numeric : bool, default: ``False``
The result of calculations can be obtained as a percentage string
either as a number, this parameter could used to toggle.
groups_ratio : float, default: ``1.0``
Ratio between two groups.
alternative : str, default: ``"two-sided"``
Alternative hypothesis, can be ``"two-sided"``, ``"greater"``
or ``"less"``.
``"greater"`` - if effect is positive.
``"less"`` - if effect is negative.
stabilizing_method : str, default: ``"asin"``
Effect trasformation. Can be ``"asin"`` and ``"norm"``.
For non-binary metrics: only ``"norm"`` is accceptable.
For binary metrics: ``"norm"`` and ``"asin"``, but ``"asin"``
is more robust and accurate.
Acceptable only for ``"theory"`` method and actual for binary metrics!
Returns
-------
result : DesignerResult
Table or dictionary with the results of parameter design for each
metric.
"""
if isinstance(effects, (float, int)):
effects = [effects]
if isinstance(sizes, int):
sizes = [sizes]
if isinstance(first_type_errors, float):
first_type_errors = [first_type_errors]
if isinstance(second_type_errors, float):
second_type_errors = [second_type_errors]
if isinstance(metrics, types.MetricNameType):
metrics = [metrics]
arguments_choice: types._PrepareArgumentsType = {
"df": (self.__df, dataframe),
"alpha": (self.__alpha, first_type_errors),
"metric": (self.__metrics, metrics),
"method": (self.__method, method),
}
designable_parameters: List[str] = [SIZE, EFFECT, POWER]
if to_design == SIZE:
arguments_choice[EFFECT] = (self.__effect, effects)
arguments_choice["beta"] = (self.__beta, second_type_errors)
chosen_args: types._UsageArgumentsType = Designer._prepare_arguments(arguments_choice)
return Designer.__pre_design(SIZE, chosen_args, **kwargs)
elif to_design == EFFECT:
arguments_choice[SIZE] = (self.__size, sizes)
arguments_choice["beta"] = (self.__beta, second_type_errors)
chosen_args: types._UsageArgumentsType = Designer._prepare_arguments(arguments_choice)
return Designer.__pre_design(EFFECT, chosen_args, **kwargs)
elif to_design == POWER:
arguments_choice[SIZE] = (self.__size, sizes)
arguments_choice[EFFECT] = (self.__effect, effects)
chosen_args: types._UsageArgumentsType = Designer._prepare_arguments(arguments_choice)
return Designer.__pre_design(POWER, chosen_args, **kwargs)
else:
raise ValueError(f'Incorrect parameter name to design, choose from {", ".join(designable_parameters)}')
[docs]
def load_from_config(yaml_config: str, loader: type = yaml.Loader) -> Designer:
"""
Restore a ``Designer`` class instance from a yaml config.
For yaml_config you can pass file name with config,
it must ends with .yaml, for example: "config.yaml".
For loader you can choose SafeLoader.
"""
if isinstance(yaml_config, str) and yaml_config.endswith(".yaml"):
with open(yaml_config, "r", encoding="utf8") as file:
return yaml.load(file, Loader=loader)
return yaml.load(yaml_config, Loader=loader)
[docs]
def design(
to_design,
dataframe: types.PassedDataType,
metrics: types.MetricNamesType,
sizes: types.SampleSizeType = None,
effects: types.EffectType = None,
first_type_errors: types.StatErrorType = (0.05,),
second_type_errors: types.StatErrorType = (0.2,),
method: str = "theory",
**kwargs,
) -> types.DesignerResult:
"""
Function wrapper around the ``Designer`` class.
Make experiment design based on historical data using passed arguments.
Creates an instance of the ``Designer`` class internally and execute
run method with corresponding arguments.
Parameters
----------
to_design : str
Parameter that will be designed using historical data.
Can take the values of ``"size"``, ``"effect"`` or ``"power"``.
dataframe : PassedDataType
DataFrame with metrics historical values.
metrics : MetricNamesType
Column names of metrics in dataframe to be designed.
sizes : SampleSizeType, optional
Values of research objects number in groups samples during
the experiment.
If is not provided, ``effects`` value must be defined.
effects : EffectType, optional
Effects for experiment
If is not provided, ``sizes`` value must be defined.
first_type_errors : StatErrorType, default: ``(0.05,)``
I type error bounds
P (detect difference for equal) < alpha.
second_type_errors : StatErrorType, default: ``(0.2,)``
II type error bounds
P (suppose equality for different groups) < beta.
method : str, default: ``"theory"``
Method used for experiment design.
Can be ``"theory"``, ``"empiric"`` or ``"binary"``.
**kwargs : Dict
Other keyword arguments.
Other Parameters
----------------
as_numeric : bool, default: ``False``
The result of calculations can be obtained as a percentage string
either as a number, this parameter could used to toggle.
groups_ratio : float, default: ``1.0``
Ratio between two groups.
alternative : str, default: ``"two-sided"``
Alternative hypothesis, can be ``"two-sided"``, ``"greater"``
or ``"less"``.
``"greater"`` - if effect is positive.
``"less"`` - if effect is negative.
stabilizing_method : str, default: ``"asin"``
Effect trasformation. Can be ``"asin"`` and ``"norm"``.
For non-binary metrics: only ``"norm"`` is accceptable.
For binary metrics: ``"norm"`` and ``"asin"``, but ``"asin"``
is more robust and accurate.
Acceptable only for ``"theory"`` method and actual for binary metrics!
Returns
-------
result : DesignerResult
Table or dictionary with the results of parameter design for each
metric.
"""
return Designer(
dataframe=dataframe,
metrics=metrics,
first_type_errors=first_type_errors,
second_type_errors=second_type_errors,
sizes=sizes,
effects=effects,
method=method,
).run(to_design, **kwargs)
def design_binary_size(
prob_a: float,
effects: types.EffectType,
first_type_errors: types.StatErrorType = (0.05,),
second_type_errors: types.StatErrorType = (0.2,),
method: str = "theory",
groups_ratio: float = 1.0,
alternative: str = "two-sided",
stabilizing_method: str = "asin",
**kwargs,
) -> pd.DataFrame:
"""
Design size for binary metrics.
Parameters
----------
prob_a : float
Probability of success for the control group.
effects : EffectType
List or single value of relative effects.
For example: ``1.05``, ``[1.05, 1.2]``.
first_type_errors : StatErrorType, default: ``(0.05,)``
I type error bounds
P (detect difference for equal) < alpha.
second_type_errors : StatErrorType, default: ``(0.2,)``
II type error bounds
P (suppose equality for different groups) < beta.
method : str, default: ``"theory"``
Supports 2 methods: ``"theory"`` and ``"binary"``
``"theory"`` ~ by formula using statsmodels solve_power mechanism
``"binary"`` ~ using different types of intervals
groups_ratio : float, default: ``1.0``
Ratio between two groups.
alternative : str, default: ``"two-sided"``
Alternative hypothesis, can be ``"two-sided"``, ``"greater"``
or ``"less"``.
``"greater"`` - if effect is positive.
``"less"`` - if effect is negative.
stabilizing_method : str, default: ``"asin"``
Effect trasformation. Can be ``"asin"`` and ``"norm"``.
For non-binary metrics: only ``"norm"`` is accceptable.
For binary metrics: ``"norm"`` and ``"asin"``, but ``"asin"``
is more robust and accurate.
**kwargs : Dict
Other keyword arguments.
Returns
-------
result_table : pd.DataFrame
Table with results of design.
"""
if isinstance(effects, (float, int)):
effects = [effects]
if isinstance(first_type_errors, float):
first_type_errors = [first_type_errors]
if isinstance(second_type_errors, float):
second_type_errors = [second_type_errors]
if method == "theory":
return theory_pkg.get_table_sample_size(
mean=prob_a,
std=None,
effects=effects,
first_errors=first_type_errors,
second_errors=second_type_errors,
target_type="binary",
groups_ratio=groups_ratio,
alternative=alternative,
stabilizing_method=stabilizing_method,
)
elif method == "binary":
return bin_pkg.get_table_sample_size_on_effect(
p_a=prob_a,
first_errors=first_type_errors,
second_errors=second_type_errors,
delta_relative_values=effects,
**kwargs,
)
else:
raise ValueError(f"Choose valid method from {BINARY_DESIGN_METHODS}, got {method}")
def design_binary_effect(
prob_a: float,
sizes: types.SampleSizeType,
first_type_errors: types.StatErrorType = (0.05,),
second_type_errors: types.StatErrorType = (0.2,),
method: str = "theory",
groups_ratio: float = 1.0,
alternative: str = "two-sided",
stabilizing_method: str = "asin",
as_numeric: bool = False,
**kwargs,
) -> pd.DataFrame:
"""
Design effect for binary metrics.
Parameters
----------
prob_a : float
Probability of success for the control group.
sizes : SampleSizeType
List or single value of group sizes.
For example: ``100``, ``[100, 200]``.
first_type_errors : StatErrorType, default: ``(0.05,)``
I type error bounds
P (detect difference for equal) < alpha.
second_type_errors : StatErrorType, default: ``(0.2,)``
II type error bounds
P (suppose equality for different groups) < beta.
method: str, default: ``"theory"``
Supports 2 methods: ``"theory"`` and ``"binary"``
``"theory"`` ~ by formula using statsmodels solve_power mechanism
``"binary"`` ~ using different types of intervals
groups_ratio : float, default: ``1.0``
Ratio between two groups.
alternative : str, default: ``"two-sided"``
Alternative hypothesis, can be ``"two-sided"``, ``"greater"``
or ``"less"``.
``"greater"`` - if effect is positive.
``"less"`` - if effect is negative.
stabilizing_method : str, default: ``"asin"``
Effect trasformation. Can be ``"asin"`` and ``"norm"``.
For non-binary metrics: only ``"norm"`` is accceptable.
For binary metrics: ``"norm"`` and ``"asin"``, but ``"asin"``
is more robust and accurate.
as_numeric : bool, default: ``False``
The result of calculations can be obtained as a percentage string
either as a number, this parameter could used to toggle.
**kwargs : Dict
Other keyword arguments.
Returns
-------
result_table : pd.DataFrame
Table with results of design.
"""
if isinstance(sizes, int):
sizes = [sizes]
if isinstance(first_type_errors, float):
first_type_errors = [first_type_errors]
if isinstance(second_type_errors, float):
second_type_errors = [second_type_errors]
if method == "theory":
return theory_pkg.get_minimal_effects_table(
mean=prob_a,
std=None,
sample_sizes=sizes,
first_errors=first_type_errors,
second_errors=second_type_errors,
as_numeric=as_numeric,
target_type="binary",
groups_ratio=groups_ratio,
alternative=alternative,
stabilizing_method=stabilizing_method,
)
elif method == "binary":
return bin_pkg.get_table_effect_on_sample_size(
p_a=prob_a,
sample_sizes=sizes,
first_errors=first_type_errors,
second_errors=second_type_errors,
as_numeric=as_numeric,
**kwargs,
)
else:
raise ValueError(f"Choose valid method from {BINARY_DESIGN_METHODS}, got {method}")
def design_binary_power(
prob_a: float,
sizes: types.SampleSizeType,
effects: types.EffectType,
first_type_errors: types.StatErrorType = (0.05,),
method: str = "theory",
groups_ratio: float = 1.0,
alternative: str = "two-sided",
stabilizing_method: str = "asin",
as_numeric: bool = False,
**kwargs,
) -> pd.DataFrame:
"""
Design power for binary metrics.
Parameters
----------
prob_a : float
Probability of success for the control group.
sizes : SampleSizeType
List of single value of group sizes.
For example: ``100``, ``[100, 200]``.
effects : EffectType
List or single value of relative effects.
For example: ``1.05``, ``[1.05, 1.2]``.
first_type_errors : StatErrorType, default: ``(0.05,)``
I type error bounds
P (detect difference for equal) < alpha.
method: str, default: ``"theory"``
Supports 2 methods: ``"theory"`` and ``"binary"``
``"theory"`` ~ by formula using statsmodels solve_power mechanism
``"binary"`` ~ using different types of intervals
groups_ratio : float, default: ``1.0``
Ratio between two groups.
alternative : str, default: ``"two-sided"``
Alternative hypothesis, can be ``"two-sided"``, ``"greater"``
or ``"less"``.
``"greater"`` - if effect is positive.
``"less"`` - if effect is negative.
stabilizing_method : str, default: ``"asin"``
Effect trasformation. Can be ``"asin"`` and ``"norm"``.
For non-binary metrics: only ``"norm"`` is accceptable.
For binary metrics: ``"norm"`` and ``"asin"``, but ``"asin"``
is more robust and accurate.
as_numeric : bool, default: ``False``
The result of calculations can be obtained as a percentage string
either as a number, this parameter could used to toggle.
**kwargs : Dict
Other keyword arguments.
Returns
-------
result_table : pd.DataFrame
Table with results of design.
"""
if isinstance(effects, (int, float)):
effects = [effects]
if isinstance(sizes, int):
sizes = [sizes]
if isinstance(first_type_errors, float):
first_type_errors = [first_type_errors]
if method == "theory":
return theory_pkg.get_power_table(
mean=prob_a,
std=None,
sample_sizes=sizes,
effects=effects,
first_errors=first_type_errors,
as_numeric=as_numeric,
target_type="binary",
groups_ratio=groups_ratio,
alternative=alternative,
stabilizing_method=stabilizing_method,
)
elif method == "binary":
return bin_pkg.get_table_power_on_size_and_delta(
p_a=prob_a,
sample_sizes=sizes,
first_errors=first_type_errors,
delta_relative_values=effects,
as_numeric=as_numeric,
**kwargs,
)
else:
raise ValueError(f"Choose valid method from {BINARY_DESIGN_METHODS}, got {method}")
[docs]
def design_binary(
to_design: str,
prob_a: float,
sizes: Optional[types.SampleSizeType] = None,
effects: Optional[types.EffectType] = None,
first_type_errors: types.StatErrorType = (0.05,),
second_type_errors: types.StatErrorType = (0.2,),
method: str = "theory",
groups_ratio: float = 1.0,
alternative: str = "two-sided",
stabilizing_method: str = "asin",
**kwargs,
) -> pd.DataFrame:
"""
Design of experiment parameters for binary metrics based
on a known conversion value.
Parameters
----------
to_design : str
Parameter to design.
prob_a : float
Probability of success for the control group.
sizes : SampleSizeType, optional
List or single value of group sizes.
For example: ``100``, ``[100, 200]``.
effects : EffectType, optional
List of single value of relative effects.
For example: 1.05, [1.05, 1.2].
first_type_errors : StatErrorType, default: ``(0.05, )``
I type error bounds
P (detect difference for equal) < alpha.
second_type_errors : StatErrorType, default: ``(0.2,)``
II type error bounds
P (suppose equality for different groups) < beta.
method: str, default: ``"theory"``
Supports 2 methods: ``"theory"`` and ``"binary"``
``"theory"`` ~ by formula using statsmodels solve_power mechanism
``"binary"`` ~ using different types of intervals
groups_ratio : float, default: ``1.0``
Ratio between two groups.
alternative : str, default: ``"two-sided"``
Alternative hypothesis, can be ``"two-sided"``, ``"greater"``
or ``"less"``.
``"greater"`` - if effect is positive.
``"less"`` - if effect is negative.
stabilizing_method : str, default: ``"asin"``
Effect trasformation. Can be ``"asin"`` and ``"norm"``.
For non-binary metrics: only ``"norm"`` is accceptable.
For binary metrics: ``"norm"`` and ``"asin"``, but ``"asin"``
is more robust and accurate.
**kwargs : Dict
Other keyword arguments.
Returns
-------
result_table : pd.DataFrame
Table with results of design.
"""
if to_design == SIZE:
return design_binary_size(
prob_a,
effects,
first_type_errors,
second_type_errors,
method,
groups_ratio,
alternative,
stabilizing_method,
**kwargs,
)
elif to_design == EFFECT:
return design_binary_effect(
prob_a,
sizes,
first_type_errors,
second_type_errors,
method,
groups_ratio,
alternative,
stabilizing_method,
**kwargs,
)
elif to_design == POWER:
return design_binary_power(
prob_a, sizes, effects, first_type_errors, method, groups_ratio, alternative, stabilizing_method, **kwargs
)
else:
raise ValueError(f"Only {SIZE}, {EFFECT} and {POWER} parameters of the binary experiment could be designed.")