I am following the process shown on Wine Quality Prediction End-to-End ML Project on Krish Naik’s YouTube channel to do a Flight Fare Prediction Project.
I run this cell of model trainer pipeline on 04_model_trainer.ipynb:
try:
config = ConfigurationManager()
model_trainer_config = config.get_model_trainer_config()
model_trainer_config = ModelTrainer(model_trainer_config)
# model_trainer_config.train()
model_trainer_config.initiate_model_training()
except Exception as e:
raise e
I get this error:
ValueError: at least one array or dtype is required
Here is the full traceback:
[2023-12-19 14:59:14,228: INFO: common: yaml file: config\config.yaml loaded successfully]
[2023-12-19 14:59:14,235: INFO: common: yaml file: params.yaml loaded successfully]
[2023-12-19 14:59:14,235: INFO: common: yaml file: schema.yaml loaded successfully]
[2023-12-19 14:59:14,235: INFO: common: created directory at: artifacts]
[2023-12-19 14:59:14,235: INFO: common: created directory at: artifacts/model_trainer]
[2023-12-19 14:59:14,467: INFO: 378519018: Splitting ]
[2023-12-19 14:59:14,468: INFO: 378519018: Exception occured during model training]
[2023-12-19 14:59:14,468: INFO: 378519018: Exception occured at model trianing]
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[17], line 8
6 model_trainer_config.initiate_model_training()
7 except Exception as e:
----> 8 raise e
Cell In[17], line 6
4 model_trainer_config = ModelTrainer(model_trainer_config)
5 # model_trainer_config.train()
----> 6 model_trainer_config.initiate_model_training()
7 except Exception as e:
8 raise e
Cell In[16], line 130, in ModelTrainer.initiate_model_training(self)
128 except Exception as e:
129 logger.info('Exception occured at model trianing')
--> 130 raise e
Cell In[16], line 104, in ModelTrainer.initiate_model_training(self)
89 logger.info('Splitting ')
91 models={
92 'LinearRegression':LinearRegression(),
93 'Lasso':Lasso(),
(...)
101 "KNN" : KNeighborsRegressor()
102 }
--> 104 model_report:dict = ModelTrainer.evaluate_model(X_train,y_train, X_test, y_test, models)
105 print(model_report)
106 print("\n====================================================================================")
Cell In[16], line 73, in ModelTrainer.evaluate_model(X_train, y_train, X_test, y_test, models)
71 except Exception as e:
72 logger.info('Exception occured during model training')
---> 73 raise e
Cell In[16], line 59, in ModelTrainer.evaluate_model(X_train, y_train, X_test, y_test, models)
56 model = list(models.values())[i]
58 # Train model
---> 59 model.fit(X_train,y_train)
61 # Predict Testing data
62 y_test_pred = model.predict(X_test)
File c:\Users\2021\.conda\envs\flightfareprediction\lib\site-packages\sklearn\base.py:1152, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs)
1145 estimator._validate_params()
1147 with config_context(
1148 skip_parameter_validation=(
1149 prefer_skip_nested_validation or global_skip_validation
1150 )
1151 ):
-> 1152 return fit_method(estimator, *args, **kwargs)
File c:\Users\2021\.conda\envs\flightfareprediction\lib\site-packages\sklearn\linear_model\_base.py:678, in LinearRegression.fit(self, X, y, sample_weight)
674 n_jobs_ = self.n_jobs
676 accept_sparse = False if self.positive else ["csr", "csc", "coo"]
--> 678 X, y = self._validate_data(
679 X, y, accept_sparse=accept_sparse, y_numeric=True, multi_output=True
680 )
682 has_sw = sample_weight is not None
683 if has_sw:
File c:\Users\2021\.conda\envs\flightfareprediction\lib\site-packages\sklearn\base.py:622, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, cast_to_ndarray, **check_params)
620 y = check_array(y, input_name="y", **check_y_params)
621 else:
--> 622 X, y = check_X_y(X, y, **check_params)
623 out = X, y
625 if not no_val_X and check_params.get("ensure_2d", True):
File c:\Users\2021\.conda\envs\flightfareprediction\lib\site-packages\sklearn\utils\validation.py:1146, in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)
1141 estimator_name = _check_estimator_name(estimator)
1142 raise ValueError(
1143 f"{estimator_name} requires y to be passed, but the target y is None"
1144 )
-> 1146 X = check_array(
1147 X,
1148 accept_sparse=accept_sparse,
1149 accept_large_sparse=accept_large_sparse,
1150 dtype=dtype,
1151 order=order,
1152 copy=copy,
1153 force_all_finite=force_all_finite,
1154 ensure_2d=ensure_2d,
1155 allow_nd=allow_nd,
1156 ensure_min_samples=ensure_min_samples,
1157 ensure_min_features=ensure_min_features,
1158 estimator=estimator,
1159 input_name="X",
1160 )
1162 y = _check_y(y, multi_output=multi_output, y_numeric=y_numeric, estimator=estimator)
1164 check_consistent_length(X, y)
File c:\Users\2021\.conda\envs\flightfareprediction\lib\site-packages\sklearn\utils\validation.py:795, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)
791 pandas_requires_conversion = any(
792 _pandas_dtype_needs_early_conversion(i) for i in dtypes_orig
793 )
794 if all(isinstance(dtype_iter, np.dtype) for dtype_iter in dtypes_orig):
--> 795 dtype_orig = np.result_type(*dtypes_orig)
796 elif pandas_requires_conversion and any(d == object for d in dtypes_orig):
797 # Force object if any of the dtypes is an object
798 dtype_orig = object
File <__array_function__ internals>:200, in result_type(*args, **kwargs)
ValueError: at least one array or dtype is required
Here is the code of ModelTrainer
class:
class ModelTrainer:
def __init__(self, model_trainer_config):
self.model_trainer_config = model_trainer_config
# def __init__(self):
# self.model_trainer_config = ModelTrainerConfig()
def save_obj(file_path, obj):
try:
dir_path = os.path.dirname(file_path)
os.makedirs(dir_path, exist_ok=True)
with open(file_path, 'wb') as file_obj:
joblib.dump(obj, file_obj, compress= ('gzip'))
except Exception as e:
logger.info('Error occured in utils save_obj')
raise e
def evaluate_model(X_train, y_train, X_test, y_test, models):
try:
report = {}
for i in range(len(models)):
model = list(models.values())[i]
# Train model
model.fit(X_train,y_train)
# Predict Testing data
y_test_pred = model.predict(X_test)
# Get R2 scores for train and test data
test_model_score = r2_score(y_test,y_test_pred)
report[list(models.keys())[i]] = test_model_score
return report
except Exception as e:
logger.info('Exception occured during model training')
raise e
def initiate_model_training(self): # removing the required variables to be passed into the function because those variables are created below (assuming they were correctly generated in train() )
# lines below taken from your commented out train() function
train_data = pd.read_csv(self.model_trainer_config.train_data_path)
test_data = pd.read_csv(self.model_trainer_config.test_data_path)
X_train = train_data.drop([self.model_trainer_config.target_column], axis=1)
X_test = test_data.drop([self.model_trainer_config.target_column], axis=1)
y_train = train_data[[self.model_trainer_config.target_column]]
y_test = test_data[[self.model_trainer_config.target_column]]
# lines above taken from your commented out train() function
try:
logger.info('Splitting ')
models={
'LinearRegression':LinearRegression(),
'Lasso':Lasso(),
'Ridge':Ridge(),
'Elasticnet':ElasticNet(),
'RandomForestRegressor': RandomForestRegressor(),
'GradientBoostRegressor()' : GradientBoostingRegressor(),
"AdaBoost" : AdaBoostRegressor(),
'DecisionTreeRegressor' : DecisionTreeRegressor(),
"SupportVectorRegressor" : SVR(),
"KNN" : KNeighborsRegressor()
}
model_report:dict = ModelTrainer.evaluate_model(X_train,y_train, X_test, y_test, models)
print(model_report)
print("\n====================================================================================")
logger.info(f'Model Report : {model_report}')
# to get best model score from dictionary
best_model_score = max(sorted(model_report.values()))
best_model_name = list(model_report.keys())[
list(model_report.values()).index(best_model_score)
]
best_model = models[best_model_name]
print(f"Best Model Found, Model Name :{best_model_name}, R2-score: {best_model_score}")
print("\n====================================================================================")
logger.info(f"Best Model Found, Model name: {best_model_name}, R2-score: {best_model_score}")
logger.info(f"{best_model.feature_names_in_}")
ModelTrainer.save_obj(
file_path = self.model_trainer_config.trained_model_file_path,
obj = best_model
)
except Exception as e:
logger.info('Exception occured at model trianing')
raise e
Here is the code of ConfigurationManager
class:
class ConfigurationManager:
def __init__(
self,
config_filepath = CONFIG_FILE_PATH,
params_filepath = PARAMS_FILE_PATH,
schema_filepath = SCHEMA_FILE_PATH):
self.config = read_yaml(config_filepath)
self.params = read_yaml(params_filepath)
self.schema = read_yaml(schema_filepath)
create_directories([self.config.artifacts_root])
def get_model_trainer_config(self) -> ModelTrainerConfig:
config = self.config.model_trainer
params = self.params.ElasticNet
schema = self.schema.TARGET_COLUMN
create_directories([config.root_dir])
model_trainer_config = ModelTrainerConfig(
root_dir=config.root_dir,
train_data_path = config.train_data_path,
test_data_path = config.test_data_path,
# # New Line Added
# trained_model_file_path = os.path.join('artifact', 'model'),
# # New Line Ended
model_name = config.model_name,
alpha = params.alpha,
l1_ratio = params.l1_ratio,
target_column = schema.name
)
return model_trainer_config
Here is the code of ModelTrainerConfig
class:
from dataclasses import dataclass
from pathlib import Path
@dataclass(frozen=True)
class ModelTrainerConfig:
root_dir: Path
train_data_path: Path
test_data_path: Path
model_name: str
alpha: float
l1_ratio: float
target_column: int
Here is my file in GitHub.
My file encoding is UTF-8
How to fix this issue?
there are undefined variables in the example code. the question needs sufficient code for a minimal reproducible example
@D.L I have updated the question with sufficient code.
Are X_train and y_train actual values? stackoverflow.com/questions/59403627/….
@AndrewRyan Yes, I think so.