YAML Configuration File Format
logs: yes OR no # whether to save logs or not
loading:
folder: "path/to/your/dataset/folder"
name: "name_of_your_data_file"
format: "format_of_your_data_file" # e.g. 'csv'
separator: "columns_separator_in_your_data_file" # e.g. ','
target_var: "name_of_the_target_variable_in_the_data_file"
preprocessing:
name_of_column_1_in_your_data_file:
type: "cont" OR "cat" # either continuous or categorial
cleaning: "cleaning_method_to_apply_to_the_column_1" # e.g. 'remove_col' or 'remove_nans' or 'remove_outliers' or a list of these
replace_nans: "method_to_apply_to_replace_NaN_values_for_column_1" # e.g. 'median' or 'mean' or 'most_frequent' or {'value': VALUE}
scaling: "scaling_method_to_apply_to_the_column_1" # e.g. 'min_max' or 'abs_max' or 'standard' or 'robust' (WARNING: only for continuous variables)
encoding: "encoding_method_to_apply_to_the_column_1" # e.g. 'binary' or 'one_hot' (WARNING: only for categorial variables)
...
name_of_column_N_in_your_data_file:
...
dataset:
split:
stratified: yes OR no # whether to ensure same proportion of target variable values in train and test sets
train: INT between 0 and 100 (proportion of the whole dataset for training) # e.g. 80 (percent)
test: INT between 0 and 100 (proportion of the whole dataset for testing) # e.g. 20 (percent)
val: INT between 0 and 100 (proportion of the training dataset for validation) # e.g. 10 (percent)
model:
classification OR regression:
name_of_the_model:
hyperparameter_1_of_the_model: VALUE_1
...
hyperparameter_K_of_the_model: VALUE_K
score: "score_or_list_of_scores_to_compute" # e.g. 'f1' or 'cross_entropy' for classification ; "rmse" or "mae" for regression ; it can be a list
output_folder: "path/to/your/output/folder"
name: "name_of_your_model_to_be_saved"
Back to Home