YAML Configuration File Format

logs: yes OR no # whether to save logs or not loading: folder: "path/to/your/dataset/folder" name: "name_of_your_data_file" format: "format_of_your_data_file" # e.g. 'csv' separator: "columns_separator_in_your_data_file" # e.g. ',' target_var: "name_of_the_target_variable_in_the_data_file" preprocessing: name_of_column_1_in_your_data_file: type: "cont" OR "cat" # either continuous or categorial cleaning: "cleaning_method_to_apply_to_the_column_1" # e.g. 'remove_col' or 'remove_nans' or 'remove_outliers' or a list of these replace_nans: "method_to_apply_to_replace_NaN_values_for_column_1" # e.g. 'median' or 'mean' or 'most_frequent' or {'value': VALUE} scaling: "scaling_method_to_apply_to_the_column_1" # e.g. 'min_max' or 'abs_max' or 'standard' or 'robust' (WARNING: only for continuous variables) encoding: "encoding_method_to_apply_to_the_column_1" # e.g. 'binary' or 'one_hot' (WARNING: only for categorial variables) ... name_of_column_N_in_your_data_file: ... dataset: split: stratified: yes OR no # whether to ensure same proportion of target variable values in train and test sets train: INT between 0 and 100 (proportion of the whole dataset for training) # e.g. 80 (percent) test: INT between 0 and 100 (proportion of the whole dataset for testing) # e.g. 20 (percent) val: INT between 0 and 100 (proportion of the training dataset for validation) # e.g. 10 (percent) model: classification OR regression: name_of_the_model: hyperparameter_1_of_the_model: VALUE_1 ... hyperparameter_K_of_the_model: VALUE_K score: "score_or_list_of_scores_to_compute" # e.g. 'f1' or 'cross_entropy' for classification ; "rmse" or "mae" for regression ; it can be a list output_folder: "path/to/your/output/folder" name: "name_of_your_model_to_be_saved"