Test Catalogue 🧪 
This documentation is intended as an exaustive list of possible tests within Wimsey. Note that examples given intentionally use all possible keywords for demonstrative purposes. This isn't required, and you can give as many or as few keywords as you like with the exception of where column is required.
mean_should 
Test that column metric is within designated range
yaml json python 
be_exactly :   300 
be_greater_than :   500 
be_greater_than_or_equal_to :   500 
be_less_than :   500 
be_less_than_or_equal_to :   300 
column :   column_a 
test :   mean_should 
 
 
{ 
   "test" :   "mean_should" , 
   "column" :   "column_a" , 
   "be_exactly" :   300 , 
   "be_less_than" :   500 , 
   "be_less_than_or_equal_to" :   300 , 
   "be_greater_than" :   500 , 
   "be_greater_than_or_equal_to" :   500 
} 
 
 
from  wimsey  import  test 
from  wimsey.tests  import  mean_should 
 
keywords  =  { 
   "column" :  "column_a" , 
   "be_exactly" :  300 , 
   "be_less_than" :  500 , 
   "be_less_than_or_equal_to" :  300 , 
   "be_greater_than" :  500 , 
   "be_greater_than_or_equal_to" :  500 
} 
 
result  =  test ( df ,  contract = [ mean_should ( ** keywords )]) 
 
 
 
 
 
min_should 
Test that column metric is within designated range
yaml json python 
be_exactly :   300 
be_greater_than :   500 
be_greater_than_or_equal_to :   500 
be_less_than :   500 
be_less_than_or_equal_to :   300 
column :   column_a 
test :   min_should 
 
 
{ 
   "test" :   "min_should" , 
   "column" :   "column_a" , 
   "be_exactly" :   300 , 
   "be_less_than" :   500 , 
   "be_less_than_or_equal_to" :   300 , 
   "be_greater_than" :   500 , 
   "be_greater_than_or_equal_to" :   500 
} 
 
 
from  wimsey  import  test 
from  wimsey.tests  import  min_should 
 
keywords  =  { 
   "column" :  "column_a" , 
   "be_exactly" :  300 , 
   "be_less_than" :  500 , 
   "be_less_than_or_equal_to" :  300 , 
   "be_greater_than" :  500 , 
   "be_greater_than_or_equal_to" :  500 
} 
 
result  =  test ( df ,  contract = [ min_should ( ** keywords )]) 
 
 
 
 
 
max_should 
Test that column metric is within designated range
yaml json python 
be_exactly :   300 
be_greater_than :   500 
be_greater_than_or_equal_to :   500 
be_less_than :   500 
be_less_than_or_equal_to :   300 
column :   column_a 
test :   max_should 
 
 
{ 
   "test" :   "max_should" , 
   "column" :   "column_a" , 
   "be_exactly" :   300 , 
   "be_less_than" :   500 , 
   "be_less_than_or_equal_to" :   300 , 
   "be_greater_than" :   500 , 
   "be_greater_than_or_equal_to" :   500 
} 
 
 
from  wimsey  import  test 
from  wimsey.tests  import  max_should 
 
keywords  =  { 
   "column" :  "column_a" , 
   "be_exactly" :  300 , 
   "be_less_than" :  500 , 
   "be_less_than_or_equal_to" :  300 , 
   "be_greater_than" :  500 , 
   "be_greater_than_or_equal_to" :  500 
} 
 
result  =  test ( df ,  contract = [ max_should ( ** keywords )]) 
 
 
 
 
 
std_should 
Test that column metric is within designated range
yaml json python 
be_exactly :   300 
be_greater_than :   500 
be_greater_than_or_equal_to :   500 
be_less_than :   500 
be_less_than_or_equal_to :   300 
column :   column_a 
test :   std_should 
 
 
{ 
   "test" :   "std_should" , 
   "column" :   "column_a" , 
   "be_exactly" :   300 , 
   "be_less_than" :   500 , 
   "be_less_than_or_equal_to" :   300 , 
   "be_greater_than" :   500 , 
   "be_greater_than_or_equal_to" :   500 
} 
 
 
from  wimsey  import  test 
from  wimsey.tests  import  std_should 
 
keywords  =  { 
   "column" :  "column_a" , 
   "be_exactly" :  300 , 
   "be_less_than" :  500 , 
   "be_less_than_or_equal_to" :  300 , 
   "be_greater_than" :  500 , 
   "be_greater_than_or_equal_to" :  500 
} 
 
result  =  test ( df ,  contract = [ std_should ( ** keywords )]) 
 
 
 
 
 
count_should 
Test that column metric is within designated range
yaml json python 
be_exactly :   300 
be_greater_than :   500 
be_greater_than_or_equal_to :   500 
be_less_than :   500 
be_less_than_or_equal_to :   300 
column :   column_a 
test :   count_should 
 
 
{ 
   "test" :   "count_should" , 
   "column" :   "column_a" , 
   "be_exactly" :   300 , 
   "be_less_than" :   500 , 
   "be_less_than_or_equal_to" :   300 , 
   "be_greater_than" :   500 , 
   "be_greater_than_or_equal_to" :   500 
} 
 
 
from  wimsey  import  test 
from  wimsey.tests  import  count_should 
 
keywords  =  { 
   "column" :  "column_a" , 
   "be_exactly" :  300 , 
   "be_less_than" :  500 , 
   "be_less_than_or_equal_to" :  300 , 
   "be_greater_than" :  500 , 
   "be_greater_than_or_equal_to" :  500 
} 
 
result  =  test ( df ,  contract = [ count_should ( ** keywords )]) 
 
 
 
 
 
row_count_should 
Test that dataframe row count is within designated range
yaml json python 
be_exactly :   300 
be_greater_than :   500 
be_greater_than_or_equal_to :   500 
be_less_than :   500 
be_less_than_or_equal_to :   300 
test :   row_count_should 
 
 
{ 
   "test" :   "row_count_should" , 
   "be_less_than" :   500 , 
   "be_less_than_or_equal_to" :   300 , 
   "be_greater_than" :   500 , 
   "be_greater_than_or_equal_to" :   500 , 
   "be_exactly" :   300 
} 
 
 
from  wimsey  import  test 
from  wimsey.tests  import  row_count_should 
 
keywords  =  { 
   "be_less_than" :  500 , 
   "be_less_than_or_equal_to" :  300 , 
   "be_greater_than" :  500 , 
   "be_greater_than_or_equal_to" :  500 , 
   "be_exactly" :  300 
} 
 
result  =  test ( df ,  contract = [ row_count_should ( ** keywords )]) 
 
 
 
 
 
average_difference_from_other_column_should 
Test that the average difference between column and other column are within designated bounds.
yaml json python 
be_exactly :   300 
be_greater_than :   500 
be_greater_than_or_equal_to :   500 
be_less_than :   500 
be_less_than_or_equal_to :   300 
column :   column_a 
other_column :   column_b 
test :   average_difference_from_other_column_should 
 
 
{ 
   "test" :   "average_difference_from_other_column_should" , 
   "column" :   "column_a" , 
   "other_column" :   "column_b" , 
   "be_exactly" :   300 , 
   "be_less_than" :   500 , 
   "be_less_than_or_equal_to" :   300 , 
   "be_greater_than" :   500 , 
   "be_greater_than_or_equal_to" :   500 
} 
 
 
from  wimsey  import  test 
from  wimsey.tests  import  average_difference_from_other_column_should 
 
keywords  =  { 
   "column" :  "column_a" , 
   "other_column" :  "column_b" , 
   "be_exactly" :  300 , 
   "be_less_than" :  500 , 
   "be_less_than_or_equal_to" :  300 , 
   "be_greater_than" :  500 , 
   "be_greater_than_or_equal_to" :  500 
} 
 
result  =  test ( df ,  contract = [ average_difference_from_other_column_should ( ** keywords )]) 
 
 
 
 
 
average_ratio_to_other_column_should 
Test that the average ratio between column and other column are within designated bounds (for instance, a value of 1 has a ratio of 0.1 to a value of 10)
yaml json python 
be_exactly :   300 
be_greater_than :   500 
be_greater_than_or_equal_to :   500 
be_less_than :   500 
be_less_than_or_equal_to :   300 
column :   column_a 
other_column :   column_b 
test :   average_ratio_to_other_column_should 
 
 
{ 
   "test" :   "average_ratio_to_other_column_should" , 
   "column" :   "column_a" , 
   "other_column" :   "column_b" , 
   "be_exactly" :   300 , 
   "be_less_than" :   500 , 
   "be_less_than_or_equal_to" :   300 , 
   "be_greater_than" :   500 , 
   "be_greater_than_or_equal_to" :   500 
} 
 
 
from  wimsey  import  test 
from  wimsey.tests  import  average_ratio_to_other_column_should 
 
keywords  =  { 
   "column" :  "column_a" , 
   "other_column" :  "column_b" , 
   "be_exactly" :  300 , 
   "be_less_than" :  500 , 
   "be_less_than_or_equal_to" :  300 , 
   "be_greater_than" :  500 , 
   "be_greater_than_or_equal_to" :  500 
} 
 
result  =  test ( df ,  contract = [ average_ratio_to_other_column_should ( ** keywords )]) 
 
 
 
 
 
max_string_length_should 
Test that the maximum string length iswithin expected bounds
yaml json python 
be_exactly :   300 
be_greater_than :   500 
be_greater_than_or_equal_to :   500 
be_less_than :   500 
be_less_than_or_equal_to :   300 
column :   column_a 
test :   max_string_length_should 
 
 
{ 
   "test" :   "max_string_length_should" , 
   "column" :   "column_a" , 
   "be_exactly" :   300 , 
   "be_less_than" :   500 , 
   "be_less_than_or_equal_to" :   300 , 
   "be_greater_than" :   500 , 
   "be_greater_than_or_equal_to" :   500 
} 
 
 
from  wimsey  import  test 
from  wimsey.tests  import  max_string_length_should 
 
keywords  =  { 
   "column" :  "column_a" , 
   "be_exactly" :  300 , 
   "be_less_than" :  500 , 
   "be_less_than_or_equal_to" :  300 , 
   "be_greater_than" :  500 , 
   "be_greater_than_or_equal_to" :  500 
} 
 
result  =  test ( df ,  contract = [ max_string_length_should ( ** keywords )]) 
 
 
 
 
 
all_values_should 
Test all unique values within a column are within expected group
yaml json python 
be_one_of : 
-   int64 
-   float64 
column :   column_a 
match_regex :   at$ 
not_be_one_of : 
-   a 
-   b 
test :   all_values_should 
 
 
{ 
   "test" :   "all_values_should" , 
   "column" :   "column_a" , 
   "be_one_of" :   [ 
     "int64" , 
     "float64" 
   ], 
   "not_be_one_of" :   [ 
     "a" , 
     "b" 
   ], 
   "match_regex" :   "at$" 
} 
 
 
from  wimsey  import  test 
from  wimsey.tests  import  all_values_should 
 
keywords  =  { 
   "column" :  "column_a" , 
   "be_one_of" :  [ 
     "int64" , 
     "float64" 
   ], 
   "not_be_one_of" :  [ 
     "a" , 
     "b" 
   ], 
   "match_regex" :  "at$" 
} 
 
result  =  test ( df ,  contract = [ all_values_should ( ** keywords )]) 
 
 
 
 
 
type_should 
Check that type of column meets expected criteria. Note that because Wimsey is a dataframe agnostic tool, this should be of Narwhals  expected types, such as Float64, Int64, String, etc. See Narwhals' documentation for more details: https://narwhals-dev.github.io/narwhals/api-reference/dtypes/
yaml json python 
be :   int64 
be_one_of : 
-   int64 
-   float64 
column :   column_a 
not_be :   string 
test :   type_should 
 
 
{ 
   "test" :   "type_should" , 
   "column" :   "column_a" , 
   "be" :   "int64" , 
   "not_be" :   "string" , 
   "be_one_of" :   [ 
     "int64" , 
     "float64" 
   ] 
} 
 
 
from  wimsey  import  test 
from  wimsey.tests  import  type_should 
 
keywords  =  { 
   "column" :  "column_a" , 
   "be" :  "int64" , 
   "not_be" :  "string" , 
   "be_one_of" :  [ 
     "int64" , 
     "float64" 
   ] 
} 
 
result  =  test ( df ,  contract = [ type_should ( ** keywords )]) 
 
 
 
 
 
columns_should 
Check that expected columns are present / non-present within dataframe
 
null_count_should 
Check that null count of column meets expected criteria.
yaml json python 
be_exactly :   300 
be_greater_than :   500 
be_greater_than_or_equal_to :   500 
be_less_than :   500 
be_less_than_or_equal_to :   300 
column :   column_a 
test :   null_count_should 
 
 
{ 
   "test" :   "null_count_should" , 
   "column" :   "column_a" , 
   "be_exactly" :   300 , 
   "be_less_than" :   500 , 
   "be_less_than_or_equal_to" :   300 , 
   "be_greater_than" :   500 , 
   "be_greater_than_or_equal_to" :   500 
} 
 
 
from  wimsey  import  test 
from  wimsey.tests  import  null_count_should 
 
keywords  =  { 
   "column" :  "column_a" , 
   "be_exactly" :  300 , 
   "be_less_than" :  500 , 
   "be_less_than_or_equal_to" :  300 , 
   "be_greater_than" :  500 , 
   "be_greater_than_or_equal_to" :  500 
} 
 
result  =  test ( df ,  contract = [ null_count_should ( ** keywords )]) 
 
 
 
 
 
null_percentage_should 
Check that null percentage of column meets expected criteria.
yaml json python 
be_exactly :   300 
be_greater_than :   500 
be_greater_than_or_equal_to :   500 
be_less_than :   500 
be_less_than_or_equal_to :   300 
column :   column_a 
test :   null_percentage_should 
 
 
{ 
   "test" :   "null_percentage_should" , 
   "column" :   "column_a" , 
   "be_exactly" :   300 , 
   "be_less_than" :   500 , 
   "be_less_than_or_equal_to" :   300 , 
   "be_greater_than" :   500 , 
   "be_greater_than_or_equal_to" :   500 
} 
 
 
from  wimsey  import  test 
from  wimsey.tests  import  null_percentage_should 
 
keywords  =  { 
   "column" :  "column_a" , 
   "be_exactly" :  300 , 
   "be_less_than" :  500 , 
   "be_less_than_or_equal_to" :  300 , 
   "be_greater_than" :  500 , 
   "be_greater_than_or_equal_to" :  500 
} 
 
result  =  test ( df ,  contract = [ null_percentage_should ( ** keywords )])