# dataset operations
type: csv # [str] -> type of your dataset
read_data_options: # options you want to supply for reading your data (See the detailed overview about this in the next section)
sep: # [str] -> Delimiter to use.
delimiter: # [str] -> Alias for sep.
header: # [int, list of int] -> Row number(s) to use as the column names, and the start of the data.
names: # [list] -> List of column names to use
index_col: # [int, str, list of int, list of str, False] -> Column(s) to use as the row labels of the DataFrame,
usecols: # [list, callable] -> Return a subset of the columns
squeeze: # [bool] -> If the parsed data only contains one column then return a Series.
prefix: # [str] -> Prefix to add to column numbers when no header, e.g. ‘X’ for X0, X1, …
mangle_dupe_cols: # [bool] -> Duplicate columns will be specified as ‘X’, ‘X.1’, …’X.N’, rather than ‘X’…’X’. Passing in False will cause data to be overwritten if there are duplicate names in the columns.
dtype: # [Type name, dict maping column name to type] -> Data type for data or columns
engine: # [str] -> Parser engine to use. The C engine is faster while the python engine is currently more feature-complete.
converters: # [dict] -> Dict of functions for converting values in certain columns. Keys can either be integers or column labels.
true_values: # [list] -> Values to consider as True.
false_values: # [list] -> Values to consider as False.
skipinitialspace: # [bool] -> Skip spaces after delimiter.
skiprows: # [list-like] -> Line numbers to skip (0-indexed) or number of lines to skip (int) at the start of the file.
skipfooter: # [int] -> Number of lines at bottom of file to skip
nrows: # [int] -> Number of rows of file to read. Useful for reading pieces of large files.
na_values: # [scalar, str, list, dict] -> Additional strings to recognize as NA/NaN.
keep_default_na: # [bool] -> Whether or not to include the default NaN values when parsing the data.
na_filter: # [bool] -> Detect missing value markers (empty strings and the value of na_values). In data without any NAs, passing na_filter=False can improve the performance of reading a large file.
verbose: # [bool] -> Indicate number of NA values placed in non-numeric columns.
skip_blank_lines: # [bool] -> If True, skip over blank lines rather than interpreting as NaN values.
parse_dates: # [bool, list of int, list of str, list of lists, dict] -> try parsing the dates
infer_datetime_format: # [bool] -> If True and parse_dates is enabled, pandas will attempt to infer the format of the datetime strings in the columns, and if it can be inferred, switch to a faster method of parsing them.
keep_date_col: # [bool] -> If True and parse_dates specifies combining multiple columns then keep the original columns.
dayfirst: # [bool] -> DD/MM format dates, international and European format.
cache_dates: # [bool] -> If True, use a cache of unique, converted dates to apply the datetime conversion.
thousands: # [str] -> the thousands operator
decimal: # [str] -> Character to recognize as decimal point (e.g. use ‘,’ for European data).
lineterminator: # [str] -> Character to break file into lines.
escapechar: # [str] -> One-character string used to escape other characters.
comment: # [str] -> Indicates remainder of line should not be parsed. If found at the beginning of a line, the line will be ignored altogether. This parameter must be a single character.
encoding: # [str] -> Encoding to use for UTF when reading/writing (ex. ‘utf-8’).
dialect: # [str, csv.Dialect] -> If provided, this parameter will override values (default or not) for the following parameters: delimiter, doublequote, escapechar, skipinitialspace, quotechar, and quoting
delim_whitespace: # [bool] -> Specifies whether or not whitespace (e.g. ' ' or ' ') will be used as the sep
low_memory: # [bool] -> Internally process the file in chunks, resulting in lower memory use while parsing, but possibly mixed type inference.
memory_map: # [bool] -> If a filepath is provided for filepath_or_buffer, map the file object directly onto memory and access the data directly from there. Using this option can improve performance because there is no longer any I/O overhead.
支持所有机器学习 SOTA 模型(甚至包括预览版模型);
支持 yaml 和 json 格式;
支持不同的 sklearn 度量,进行回归、分类和聚类;
支持多输出 / 多目标回归和分类;
如果你对评估结果比较满意,就可以使用这个训练 / 预训练好的模型执行预测。
