import numpy as np
def split_data(X, Y, proportions, rng = None):
"""Split input and output into 3 subsets for ML model.
Arguments
=========
X, Y: ndarrays where rows are number of observations
(both arrays have identical number of rows)
proportions: list with decimal fraction of original data defining
allocation into three parts (train, validate, test sets,
respectively). The list is len(proportions)=3, and
contains floats that should sum to 1.0.
rng: numpy random generator instance for reproducibility. If None,
a new generator is created without a fixed seed.
Returns
=======
X_train, X_val, X_test, Y_train, Y_val, Y_test:
6 ndarrays (3 splits each for input and output), where the number of
columns corresponds to the original input and output (respectively)
and the sum of the number of rows is equal to the rows of the original
input/output.
"""
assert # YOUR_CODE_LINE_HERE, "Three proportions must be provided"
assert # YOUR_CODE_LINE_HERE, "Sum of proportions should be one"
assert # YOUR_CODE_LINE_HERE, " and Y arrays must have same dimensions"
# Shuffle data using random permutation of indices
if rng is None:
rng = np.random.default_rng()
indices = # YOUR_CODE_LINE_HERE
# Create shuffled training, validation and test sets
### YOUR_CODE_LINES_HERE # way more than one line!
assert # YOUR_CODE_LINE_HERE, "Generated datasets don't have same accumulated length as original"
return X_train, X_val, X_test, Y_train, Y_val, Y_test