@@ -128,9 +128,12 @@ def merge_values(values, merge_function):
128128
129129
130130def get_values (values , lhs_name = None , rhs_name = None ):
131- exclude_cols = ["diff" , "t-value" , "p-value" , "significant" ]
132- exclude_cols .extend ([f'std_{ lhs_name } ' , f'std_{ rhs_name } ' ])
133- exclude_cols .extend ([f'cv_{ lhs_name } ' , f'cv_{ rhs_name } ' ])
131+ exclude_cols = [
132+ "diff" , "t-value" , "p-value" , "significant" ,
133+ f'std_{ lhs_name } ' , f'std_{ rhs_name } ' ,
134+ f'cv_{ lhs_name } ' , f'cv_{ rhs_name } ' ,
135+ "diff_ci_rel" , "diff_ci_abs" ,
136+ ]
134137 values = values [[c for c in values .columns if c not in exclude_cols ]]
135138 has_two_runs = len (values .columns ) == 2
136139 if has_two_runs :
@@ -163,7 +166,7 @@ def add_diff_column(metric, values, absolute_diff=False):
163166 return values
164167
165168
166- def compute_statistics (lhs_d , rhs_d , metrics , alpha , coeff_var , lhs_name , rhs_name ):
169+ def compute_statistics (lhs_d , rhs_d , lhs_name , rhs_name , metrics , alpha , coeff_var , diff_conf_int ):
167170 stats_dict = {}
168171
169172 for metric in metrics :
@@ -181,9 +184,9 @@ def compute_statistics(lhs_d, rhs_d, metrics, alpha, coeff_var, lhs_name, rhs_na
181184 if len (lhs_values ) >= 2 and len (rhs_values ) >= 2 :
182185 lhs_std = lhs_values .std (ddof = 1 )
183186 rhs_std = rhs_values .std (ddof = 1 )
187+ lhs_mean = lhs_values .mean ()
188+ rhs_mean = rhs_values .mean ()
184189 if coeff_var :
185- lhs_mean = lhs_values .mean ()
186- rhs_mean = rhs_values .mean ()
187190 stats_dict [metric ][program ] = {
188191 f'cv_{ lhs_name } ' : lhs_std / lhs_mean if lhs_mean != 0 else float ('nan' ),
189192 f'cv_{ rhs_name } ' : rhs_std / rhs_mean if rhs_mean != 0 else float ('nan' ),
@@ -193,10 +196,26 @@ def compute_statistics(lhs_d, rhs_d, metrics, alpha, coeff_var, lhs_name, rhs_na
193196 f'std_{ lhs_name } ' : lhs_std ,
194197 f'std_{ rhs_name } ' : rhs_std ,
195198 }
196- t_stat , p_val = stats .ttest_ind (lhs_values , rhs_values )
197- stats_dict [metric ][program ]['t-value' ] = t_stat
198- stats_dict [metric ][program ]['p-value' ] = p_val
199- stats_dict [metric ][program ]['significant' ] = "Y" if p_val < alpha else "N"
199+ ttest = stats .ttest_ind (lhs_values , rhs_values )
200+ stats_dict [metric ][program ]['t-value' ] = ttest .statistic
201+ stats_dict [metric ][program ]['p-value' ] = ttest .pvalue
202+ stats_dict [metric ][program ]['significant' ] = "Y" if ttest .pvalue < alpha else "N"
203+
204+ if diff_conf_int :
205+ ci = ttest .confidence_interval (1 - alpha )
206+ # CI is for mean(lhs)-mean(rhs); negate for rhs-lhs
207+ abs_lo = - ci .high
208+ abs_hi = - ci .low
209+ if diff_conf_int == "relative" :
210+ if lhs_mean != 0 :
211+ ci_lo = abs_lo / lhs_mean
212+ ci_hi = abs_hi / lhs_mean
213+ else :
214+ ci_lo = float ('nan' )
215+ ci_hi = float ('nan' )
216+ stats_dict [metric ][program ]['diff_ci_rel' ] = (ci_lo , ci_hi )
217+ else :
218+ stats_dict [metric ][program ]['diff_ci_abs' ] = (abs_lo , abs_hi )
200219 else :
201220 if coeff_var :
202221 stats_dict [metric ][program ] = {
@@ -218,6 +237,10 @@ def compute_statistics(lhs_d, rhs_d, metrics, alpha, coeff_var, lhs_name, rhs_na
218237 else :
219238 stat_col_names += [f'std_{ lhs_name } ' , f'std_{ rhs_name } ' ]
220239 stat_col_names += ['t-value' , 'p-value' , 'significant' ]
240+ if diff_conf_int == "relative" :
241+ stat_col_names += ['diff_ci_rel' ]
242+ elif diff_conf_int == "absolute" :
243+ stat_col_names += ['diff_ci_abs' ]
221244
222245 return stats_dict , stat_col_names
223246
@@ -397,6 +420,14 @@ def print_result(
397420 formatters [(m , f'cv_{ lhs_name } ' )] = lambda x : "%4.1f%%" % (x * 100 ) if not pd .isna (x ) else ""
398421 if (m , f'cv_{ rhs_name } ' ) in dataout .columns :
399422 formatters [(m , f'cv_{ rhs_name } ' )] = lambda x : "%4.1f%%" % (x * 100 ) if not pd .isna (x ) else ""
423+ if (m , "diff_ci_rel" ) in dataout .columns :
424+ formatters [(m , "diff_ci_rel" )] = lambda x : \
425+ "[%4.1f%%, %4.1f%%]" % (x [0 ] * 100 , x [1 ] * 100 ) \
426+ if isinstance (x , tuple ) and not (pd .isna (x [0 ]) or pd .isna (x [1 ])) else ""
427+ if (m , "diff_ci_abs" ) in dataout .columns :
428+ formatters [(m , "diff_ci_abs" )] = lambda x : \
429+ "[%4.3f, %4.3f]" % (x [0 ], x [1 ]) \
430+ if isinstance (x , tuple ) and not (pd .isna (x [0 ]) or pd .isna (x [1 ])) else ""
400431 # Turn index into a column so we can format it...
401432 formatted_program = dataout .index .to_series ()
402433 if shorten_names :
@@ -445,9 +476,12 @@ def float_format(x):
445476 formatters = formatters ,
446477 )
447478 print (out )
448- exclude_from_summary = ["t-value" , "p-value" , "significant" ]
449- exclude_from_summary .extend ([f'std_{ lhs_name } ' , f'std_{ rhs_name } ' ])
450- exclude_from_summary .extend ([f'cv_{ lhs_name } ' , f'cv_{ rhs_name } ' ])
479+ exclude_from_summary = [
480+ "t-value" , "p-value" , "significant" ,
481+ f'std_{ lhs_name } ' , f'std_{ rhs_name } ' ,
482+ f'cv_{ lhs_name } ' , f'cv_{ rhs_name } ' ,
483+ 'diff_ci_rel' , 'diff_ci_abs' ,
484+ ]
451485 d_summary = d .drop (columns = exclude_from_summary , level = 1 , errors = 'ignore' )
452486 print (d_summary .describe ())
453487
@@ -564,6 +598,15 @@ def main():
564598 default = False ,
565599 help = "Compute relative coefficient of variation (%%) rather than absolute stddev" ,
566600 )
601+ parser .add_argument (
602+ "--diff-confidence-interval" ,
603+ choices = ["relative" , "absolute" ],
604+ nargs = "?" ,
605+ const = "relative" ,
606+ default = None ,
607+ dest = "diff_confidence_interval" ,
608+ help = "Show confidence interval for the difference (default: relative)" ,
609+ )
567610 config = parser .parse_args ()
568611
569612 if config .show_diff is None :
@@ -604,11 +647,13 @@ def main():
604647 if config .statistics :
605648 metrics_for_stats = config .metrics if len (config .metrics ) > 0 else get_default_metric (lhs_d , rhs_d )
606649 stats_dict , stat_col_names = compute_statistics (
607- lhs_d , rhs_d , metrics_for_stats ,
650+ lhs_d , rhs_d ,
651+ lhs_name = config .lhs_name ,
652+ rhs_name = config .rhs_name ,
653+ metrics = metrics_for_stats ,
608654 alpha = config .alpha ,
609655 coeff_var = config .coefficient_variation ,
610- lhs_name = config .lhs_name ,
611- rhs_name = config .rhs_name
656+ diff_conf_int = config .diff_confidence_interval ,
612657 )
613658
614659 # Merge data
0 commit comments