I’m facing an error which I can’t understand using Keras for a prediction task.
Here is my code:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
import matplotlib.pyplot as plt
import finpy_tse as fpy
# Define the stock symbol and time period
stock_symbol="وغدیر"
start_date="1400-01-01"
end_date="1402-01-01"
# Download stock price data from the Tehran Stock Exchange using finpy_tse
stock_data = fpy.Get_Price_History(stock_symbol, start_date, end_date)
# Print the fetched data
print("Fetched data from the site:")
print(stock_data)
data = pd.DataFrame(stock_data)
# Download stock price data from the Tehran Stock Exchange using finpy_tse
stock_data = fpy.Get_Price_History(stock_symbol, start_date, end_date)
data = pd.DataFrame(stock_data)
# Print columns to inspect the structure of the DataFrame
print(data.columns)
# Feature engineering: Creating additional features if needed
# Assuming the 'Close' or 'Final' and 'Volume' columns are present in the DataFrame
if 'Close' in data.columns and 'Volume' in data.columns:
data['DailyReturn'] = data['Close'].pct_change()
data['VolumeChange'] = data['Volume'].pct_change()
# Drop missing values
data = data.dropna()
# Check if the dataset is empty after dropping missing values
if data.empty:
print("Empty dataset after dropping missing values. Unable to proceed.")
else:
# Use multiple features for training
features = ['DailyReturn', 'VolumeChange']
dataset = data[features].values
# Check if the dataset is empty after feature engineering
if dataset.shape[0] == 0:
print("Empty dataset after feature engineering. Unable to proceed.")
else:
# Normalize the data only if the dataset is not empty
scaler = MinMaxScaler(feature_range=(0, 1))
dataset_scaled = scaler.fit_transform(dataset)
# Continue with the rest of your code...
# Create a time series dataset
def create_dataset(dataset, time_steps=1):
X, y = [], []
for i in range(len(dataset) - time_steps):
a = dataset[i:(i + time_steps), :]
X.append(a)
y.append(dataset[i + time_steps, 0]) # Predicting DailyReturn
return np.array(X), np.array(y)
# Define the time steps (you can adjust this parameter)
time_steps = 10
# Create the time series dataset
X, y = create_dataset(dataset_scaled, time_steps)
# Print shape and size for debugging
print("Before reshaping - X shape:", X.shape, "X size:", X.size)
# Reshape data for LSTM model [samples, time steps, features]
X = np.reshape(X, (1, X.shape[0], X.shape[1]))
# Build the LSTM model with dropout for better generalization
model = Sequential()
model.add(LSTM(units=100, return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(units=100, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=1))
model.compile(optimizer="adam", loss="mean_squared_error")
# Train the model
model.fit(X, y, epochs=10, batch_size=32, validation_split=0.1)
# Test the model on new data
test_data = dataset_scaled[-time_steps:]
test_data = np.reshape(test_data, (1, time_steps, len(features)))
predicted_price = model.predict(test_data)
# Inverse transform and reshape the predicted_price
predicted_price = scaler.inverse_transform(np.reshape(predicted_price, (1, time_steps, len(features))))
print("Shapes for debugging:")
print("test_data shape:", test_data.shape)
print("predicted_price shape:", predicted_price.shape)
# Reshape and inverse transform
predicted_price = scaler.inverse_transform(np.reshape(predicted_price, (time_steps, len(features))))
# Assuming you are interested in the first column of the prediction
predicted_price = predicted_price[0, :, 0]
# Visualize the results
plt.plot(data['Close'].values, label="Actual Stock Price")
plt.plot(np.arange(len(data['Close']), len(data['Close']) + time_steps), predicted_price, marker="o", color="red",
label="Predicted Stock Price")
plt.xlabel('Days')
plt.ylabel('Stock Price')
plt.title(f'Stock Price Prediction for {stock_symbol} using LSTM')
plt.legend()
plt.show()
else:
print("The 'Close' or 'Volume' columns are not present in the DataFrame.")
print("Please check your stock price data or provide a DataFrame with these columns.")
and the error is:
Traceback (most recent call last): File
“…:/Users/…../PycharmProjects/untitled1/…..py”, line 76, inX = np.reshape(X, (1, X.shape[0], X.shape[1])) File “<array_function internals>”, line 180, in reshape File
“….:\Users…..\PycharmProjects\untitled1\venv\lib\site-packages\numpy\core\fromnumeric.py”,
line 298, in reshape
return _wrapfunc(a, ‘reshape’, newshape, order=order) File “….:\Users….\PycharmProjects\untitled1\venv\lib\site-packages\numpy\core\fromnumeric.py”,
line 57, in _wrapfunc
return bound(*args, **kwds) ValueError: cannot reshape array of size 9200 into shape (1,460,10)