import pandas as pd
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
df = pd.read_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\BTC_GBP Coinbase Pro Historical Data.csv')
print(df.head())
df.dtypes
Date Price Open High Low Vol. Change % 0 03/01/2024 49,286.4 48,428.9 49,877.9 48,087.1 0.29K 1.77% 1 02/29/2024 48,428.9 48,291.2 49,826.0 46,392.2 0.55K 0.34% 2 02/28/2024 48,266.6 44,822.7 50,332.0 44,600.0 0.72K 7.72% 3 02/27/2024 44,808.9 42,720.5 45,120.0 41,954.2 0.36K 4.90% 4 02/26/2024 42,715.9 40,829.1 43,192.9 40,129.8 0.27K 4.62%
Date object Price object Open object High object Low object Vol. object Change % object dtype: object
df.dtypes
df['Price'] = df['Price'].astype(str).str.replace(',', '').astype(float)
df['Open'] = df['Open'].astype(str).str.replace(',', '').astype(float)
df['High'] = df['High'].astype(str).str.replace(',', '').astype(float)
df['Low'] = df['Low'].astype(str).str.replace(',', '').astype(float)
# Replace the percentage sign and convert to float
df['Change %'] = df['Change %'].astype(str).str.replace('%', '').astype(float)
#Change vol - K and M
def convert_volume_to_numeric(vol):
if pd.isna(vol):
return None
vol = str(vol).upper().replace(',', '')
if 'K' in vol:
return float(vol.replace('K', '')) * 1e3
elif 'M' in vol:
return float(vol.replace('M', '')) * 1e6
else:
return float(vol)
df['Vol.'] = df['Vol.'].apply(convert_volume_to_numeric)
# Change the date datatype: Object to Datetime
df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%Y')
# Display the cleaned DataFrame
print(df)
df.dtypes
Date Price Open High Low Vol. Change % 0 2024-03-01 49286.4 48428.9 49877.9 48087.1 290.0 1.77 1 2024-02-29 48428.9 48291.2 49826.0 46392.2 550.0 0.34 2 2024-02-28 48266.6 44822.7 50332.0 44600.0 720.0 7.72 3 2024-02-27 44808.9 42720.5 45120.0 41954.2 360.0 4.90 4 2024-02-26 42715.9 40829.1 43192.9 40129.8 270.0 4.62 ... ... ... ... ... ... ... ... 1092 2021-03-05 35229.0 34857.7 35229.0 34106.6 1340.0 1.06 1093 2021-03-04 34858.0 36240.0 36845.8 34836.3 1430.0 -3.91 1094 2021-03-03 36276.8 34767.1 37587.9 34767.1 1260.0 4.32 1095 2021-03-02 34774.1 35656.3 35656.3 34150.0 950.0 -2.47 1096 2021-03-01 35656.3 32380.0 35656.3 32380.0 1360.0 10.15 [1097 rows x 7 columns]
Date datetime64[ns] Price float64 Open float64 High float64 Low float64 Vol. float64 Change % float64 dtype: object
bit_usd = pd.read_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\BTC_USD Bitfinex Historical Data.csv')
print(bit_usd.head())
bit_usd.dtypes
Date Price Open High Low Vol. Change % 0 03/01/2024 62,346.0 61,148.0 63,000.0 60,783.0 2.51K 1.96% 1 02/29/2024 61,148.0 62,380.0 63,566.0 60,385.0 4.10K -1.97% 2 02/28/2024 62,380.0 56,998.0 63,780.0 56,668.0 6.63K 9.44% 3 02/27/2024 56,998.0 54,417.0 57,487.0 54,388.0 4.50K 4.74% 4 02/26/2024 54,417.0 51,716.0 54,720.0 50,903.0 4.39K 5.22%
Date object Price object Open object High object Low object Vol. object Change % object dtype: object
bit_usd['Price'] = bit_usd['Price'].astype(str).str.replace(',', '').astype(float)
bit_usd['Open'] = bit_usd['Open'].astype(str).str.replace(',', '').astype(float)
bit_usd['High'] = bit_usd['High'].astype(str).str.replace(',', '').astype(float)
bit_usd['Low'] = bit_usd['Low'].astype(str).str.replace(',', '').astype(float)
# Replace the percentage sign and convert to float
bit_usd['Change %'] = bit_usd['Change %'].astype(str).str.replace('%', '').astype(float)
#Change vol - K and M
def convert_volume_to_numeric(vol):
if pd.isna(vol):
return None
vol = str(vol).upper().replace(',', '')
if 'K' in vol:
return float(vol.replace('K', '')) * 1e3
elif 'M' in vol:
return float(vol.replace('M', '')) * 1e6
else:
return float(vol)
bit_usd['Vol.'] = bit_usd['Vol.'].apply(convert_volume_to_numeric)
# Change the date datatype from Object to Datetime
bit_usd['Date'] = pd.to_datetime(bit_usd['Date'], format='%m/%d/%Y')
# Display the cleaned DataFrame
print(bit_usd)
# Display the data types again to confirm changes
print(bit_usd.dtypes)
Date Price Open High Low Vol. Change % 0 2024-03-01 62346.0 61148.0 63000.0 60783.0 2510.0 1.96 1 2024-02-29 61148.0 62380.0 63566.0 60385.0 4100.0 -1.97 2 2024-02-28 62380.0 56998.0 63780.0 56668.0 6630.0 9.44 3 2024-02-27 56998.0 54417.0 57487.0 54388.0 4500.0 4.74 4 2024-02-26 54417.0 51716.0 54720.0 50903.0 4390.0 5.22 ... ... ... ... ... ... ... ... 1092 2021-03-05 48811.0 48368.0 49277.0 46662.0 9420.0 0.92 1093 2021-03-04 48368.0 50425.0 51556.0 48059.0 9560.0 -4.08 1094 2021-03-03 50425.0 48526.0 52018.0 48526.0 8600.0 3.91 1095 2021-03-02 48526.0 49572.0 49572.0 47453.6 8370.0 -2.11 1096 2021-03-01 49572.0 45300.0 49572.0 45300.0 9420.0 9.43 [1097 rows x 7 columns] Date datetime64[ns] Price float64 Open float64 High float64 Low float64 Vol. float64 Change % float64 dtype: object
bit_inr = pd.read_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\BTC_INR Synthetic Historical Data.csv')
print(bit_inr.head())
bit_inr.dtypes
Date Price Open High Low Vol. Change % 0 03/01/2024 5,170,624 5,071,696 5,227,076 5,036,433 NaN 1.91% 1 02/29/2024 5,073,948 5,176,916 5,279,115 5,017,673 NaN -1.98% 2 02/28/2024 5,176,321 4,731,160 5,296,428 4,699,779 NaN 9.40% 3 02/27/2024 4,731,766 4,513,558 4,768,902 4,512,017 NaN 4.82% 4 02/26/2024 4,514,234 4,287,393 4,548,782 4,221,497 NaN 5.28%
Date object Price object Open object High object Low object Vol. float64 Change % object dtype: object
bit_inr['Price'] = bit_inr['Price'].astype(str).str.replace(',', '').astype(float)
bit_inr['Open'] = bit_inr['Open'].astype(str).str.replace(',', '').astype(float)
bit_inr['High'] = bit_inr['High'].astype(str).str.replace(',', '').astype(float)
bit_inr['Low'] = bit_inr['Low'].astype(str).str.replace(',', '').astype(float)
# Replace the percentage sign and convert to float
bit_inr['Change %'] = bit_inr['Change %'].astype(str).str.replace('%', '').astype(float)
# Change the date datatype from Object to Datetime
bit_inr['Date'] = pd.to_datetime(bit_inr['Date'], format='%m/%d/%Y')
# Display the cleaned DataFrame
print(bit_inr)
# Display the data types again to confirm changes
print(bit_inr.dtypes)
Date Price Open High Low Vol. Change % 0 2024-03-01 5170624.0 5071696.0 5227076.0 5036433.0 NaN 1.91 1 2024-02-29 5073948.0 5176916.0 5279115.0 5017673.0 NaN -1.98 2 2024-02-28 5176321.0 4731160.0 5296428.0 4699779.0 NaN 9.40 3 2024-02-27 4731766.0 4513558.0 4768902.0 4512017.0 NaN 4.82 4 2024-02-26 4514234.0 4287393.0 4548782.0 4221497.0 NaN 5.28 ... ... ... ... ... ... ... ... 1092 2021-03-05 3575587.0 3536499.0 3617104.0 3388905.0 NaN 1.00 1093 2021-03-04 3540226.0 3675759.0 3777031.0 3456393.0 NaN -3.82 1094 2021-03-03 3680660.0 3545733.0 3824793.0 3527003.0 NaN 3.84 1095 2021-03-02 3544437.0 3639517.0 3683463.0 3452170.0 NaN -2.60 1096 2021-03-01 3639207.0 3342246.0 3652659.0 3327820.0 NaN 9.29 [1097 rows x 7 columns] Date datetime64[ns] Price float64 Open float64 High float64 Low float64 Vol. float64 Change % float64 dtype: object
eth_gbp = pd.read_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\ETH_GBP Coinbase Pro Historical Data.csv')
print(eth_gbp.head())
eth_gbp.dtypes
Date Price Open High Low Vol. Change % 0 03/01/2024 2,712.34 2,645.76 2,730.76 2,643.83 1.84K 2.51% 1 02/29/2024 2,646.04 2,617.37 2,772.57 2,570.71 4.62K 1.11% 2 02/28/2024 2,616.93 2,546.90 2,748.91 2,534.89 4.35K 2.75% 3 02/27/2024 2,546.90 2,490.22 2,581.24 2,409.79 2.89K 2.15% 4 02/26/2024 2,493.37 2,456.94 2,515.03 2,392.68 2.45K 1.48%
Date object Price object Open object High object Low object Vol. object Change % object dtype: object
eth_gbp['Price'] = eth_gbp['Price'].astype(str).str.replace(',', '').astype(float)
eth_gbp['Open'] = eth_gbp['Open'].astype(str).str.replace(',', '').astype(float)
eth_gbp['High'] = eth_gbp['High'].astype(str).str.replace(',', '').astype(float)
eth_gbp['Low'] = eth_gbp['Low'].astype(str).str.replace(',', '').astype(float)
# Replace the percentage sign and convert to float
eth_gbp['Change %'] = eth_gbp['Change %'].astype(str).str.replace('%', '').astype(float)
# Convert 'Vol.' from K and M to numeric
def convert_volume_to_numeric(vol):
if pd.isna(vol):
return None
vol = str(vol).upper().replace(',', '')
if 'K' in vol:
return float(vol.replace('K', '')) * 1e3
elif 'M' in vol:
return float(vol.replace('M', '')) * 1e6
else:
return float(vol)
eth_gbp['Vol.'] = eth_gbp['Vol.'].apply(convert_volume_to_numeric)
# Change the date datatype from Object to Datetime
eth_gbp['Date'] = pd.to_datetime(eth_gbp['Date'], format='%m/%d/%Y')
# Display the cleaned DataFrame
print(eth_gbp)
# Display the data types again to confirm changes
print(eth_gbp.dtypes)
Date Price Open High Low Vol. Change % 0 2024-03-01 2712.34 2645.76 2730.76 2643.83 1840.0 2.51 1 2024-02-29 2646.04 2617.37 2772.57 2570.71 4620.0 1.11 2 2024-02-28 2616.93 2546.90 2748.91 2534.89 4350.0 2.75 3 2024-02-27 2546.90 2490.22 2581.24 2409.79 2890.0 2.15 4 2024-02-26 2493.37 2456.94 2515.03 2392.68 2450.0 1.48 ... ... ... ... ... ... ... ... 1092 2021-03-05 1103.72 1109.56 1109.56 1066.83 16360.0 -0.53 1093 2021-03-04 1109.56 1128.71 1151.74 1109.56 14850.0 -1.70 1094 2021-03-03 1128.71 1067.56 1179.95 1067.56 15660.0 5.81 1095 2021-03-02 1066.76 1129.25 1129.25 1055.80 17420.0 -5.51 1096 2021-03-01 1129.00 1016.71 1129.00 1016.71 18170.0 10.93 [1097 rows x 7 columns] Date datetime64[ns] Price float64 Open float64 High float64 Low float64 Vol. float64 Change % float64 dtype: object
eth_usd = pd.read_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\Ethereum Historical Data.csv')
print(eth_usd.head())
eth_usd.dtypes
Date Price Open High Low Vol. Change % 0 03/01/2024 3,433.51 3,339.25 3,449.56 3,339.24 431.98K 2.82% 1 02/29/2024 3,339.26 3,382.98 3,520.80 3,300.42 802.17K -1.29% 2 02/28/2024 3,383.03 3,242.44 3,482.48 3,183.92 1.01M 4.35% 3 02/27/2024 3,242.14 3,175.94 3,285.01 3,165.91 674.56K 2.08% 4 02/26/2024 3,175.94 3,112.59 3,193.67 3,038.00 582.39K 2.04%
Date object Price object Open object High object Low object Vol. object Change % object dtype: object
eth_usd['Price'] = eth_usd['Price'].astype(str).str.replace(',', '').astype(float)
eth_usd['Open'] = eth_usd['Open'].astype(str).str.replace(',', '').astype(float)
eth_usd['High'] = eth_usd['High'].astype(str).str.replace(',', '').astype(float)
eth_usd['Low'] = eth_usd['Low'].astype(str).str.replace(',', '').astype(float)
# Replace the percentage sign and convert to float
eth_usd['Change %'] = eth_usd['Change %'].astype(str).str.replace('%', '').astype(float)
# Convert 'Vol.' from K and M to numeric
def convert_volume_to_numeric(vol):
if pd.isna(vol):
return None
vol = str(vol).upper().replace(',', '')
if 'K' in vol:
return float(vol.replace('K', '')) * 1e3
elif 'M' in vol:
return float(vol.replace('M', '')) * 1e6
elif 'B' in vol:
return float(vol.replace('B', '')) * 1e9
else:
return float(vol)
eth_usd['Vol.'] = eth_usd['Vol.'].apply(convert_volume_to_numeric)
# Change the date datatype from Object to Datetime
eth_usd['Date'] = pd.to_datetime(eth_usd['Date'], format='%m/%d/%Y')
# Display the cleaned DataFrame
print(eth_usd)
# Display the data types again to confirm changes
print(eth_usd.dtypes)
Date Price Open High Low Vol. Change % 0 2024-03-01 3433.51 3339.25 3449.56 3339.24 431980.0 2.82 1 2024-02-29 3339.26 3382.98 3520.80 3300.42 802170.0 -1.29 2 2024-02-28 3383.03 3242.44 3482.48 3183.92 1010000.0 4.35 3 2024-02-27 3242.14 3175.94 3285.01 3165.91 674560.0 2.08 4 2024-02-26 3175.94 3112.59 3193.67 3038.00 582390.0 2.04 ... ... ... ... ... ... ... ... 1092 2021-03-05 1530.29 1539.73 1548.71 1443.32 1670000.0 -0.54 1093 2021-03-04 1538.61 1567.84 1623.73 1506.86 1700000.0 -2.06 1094 2021-03-03 1571.05 1488.80 1653.04 1477.72 1170000.0 5.54 1095 2021-03-02 1488.62 1570.00 1602.13 1457.22 1740000.0 -5.18 1096 2021-03-01 1570.03 1418.91 1571.15 1411.55 1920000.0 10.66 [1097 rows x 7 columns] Date datetime64[ns] Price float64 Open float64 High float64 Low float64 Vol. float64 Change % float64 dtype: object
eth_inr = pd.read_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\ETH_INR Synthetic Historical Data.csv')
print(eth_inr.head())
eth_inr.dtypes
Date Price Open High Low Vol. Change % 0 03/01/2024 284,443 277,151 285,858 276,932 NaN 2.69% 1 02/29/2024 276,986 280,510 291,857 273,646 NaN -1.26% 2 02/28/2024 280,525 268,963 288,714 263,953 NaN 4.27% 3 02/27/2024 269,047 263,240 272,341 262,357 NaN 2.24% 4 02/26/2024 263,146 258,124 264,699 251,787 NaN 1.98%
Date object Price object Open object High object Low object Vol. float64 Change % object dtype: object
eth_inr['Price'] = eth_inr['Price'].astype(str).str.replace(',', '').astype(float)
eth_inr['Open'] = eth_inr['Open'].astype(str).str.replace(',', '').astype(float)
eth_inr['High'] = eth_inr['High'].astype(str).str.replace(',', '').astype(float)
eth_inr['Low'] = eth_inr['Low'].astype(str).str.replace(',', '').astype(float)
# Replace the percentage sign and convert to float
eth_inr['Change %'] = eth_inr['Change %'].astype(str).str.replace('%', '').astype(float)
# Change the date datatype from Object to Datetime
eth_inr['Date'] = pd.to_datetime(eth_inr['Date'], format='%m/%d/%Y')
# Display the cleaned DataFrame
print(eth_inr)
# Display the data types again to confirm changes
print(eth_inr.dtypes)
Date Price Open High Low Vol. Change % 0 2024-03-01 284443.0 277151.0 285858.0 276932.0 NaN 2.69 1 2024-02-29 276986.0 280510.0 291857.0 273646.0 NaN -1.26 2 2024-02-28 280525.0 268963.0 288714.0 263953.0 NaN 4.27 3 2024-02-27 269047.0 263240.0 272341.0 262357.0 NaN 2.24 4 2024-02-26 263146.0 258124.0 264699.0 251787.0 NaN 1.98 ... ... ... ... ... ... ... ... 1092 2021-03-05 112137.0 112686.0 113266.0 105354.0 NaN -0.30 1093 2021-03-04 112469.0 114257.0 118471.0 109596.0 NaN -1.93 1094 2021-03-03 114688.0 108800.0 120400.0 108254.0 NaN 5.04 1095 2021-03-02 109183.0 115186.0 117553.0 106746.0 NaN -5.27 1096 2021-03-01 115252.0 105213.0 115301.0 104118.0 NaN 10.03 [1097 rows x 7 columns] Date datetime64[ns] Price float64 Open float64 High float64 Low float64 Vol. float64 Change % float64 dtype: object
# Start by merging the first two
#merged_df = pd.merge(df, bit_usd, on='Date', how='outer')
# Continue merging the rest one by one
#merged_df = pd.merge(merged_df, bit_inr, on='Date', how='outer')
# Once all are merged, save to CSV
#merged_df.to_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\merged_bitcoin_dataset.csv', index=False)
# merged_df1 = pd.merge(eth_usd, eth_gbp, on='Date', how='outer')
# merged_df1 = pd.merge(merged_df1, eth_inr, on='Date', how='outer')
# merged_df1.to_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\merged_ethereum_dataset.csv', index=False)
bitcoin_df = pd.read_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\merged_bitcoin_dataset.csv')
rename_dict = {
'Price_x': 'Price_GBP',
'Price_y': 'Price_USD',
'Price': 'Price_INR',
'Open_x': 'Open_GBP',
'High_x': 'High_GBP',
'Low_x': 'Low_GBP',
'Vol._x': 'Vol._GBP',
'Change %_x': 'Change %_GBP',
'Open_y': 'Open_USD',
'High_y': 'High_USD',
'Low_y': 'Low_USD',
'Vol._y': 'Vol._USD',
'Change %_y': 'Change %_USD',
'Open': 'Open_INR',
'High': 'High_INR',
'Low': 'Low_INR',
'Vol.': 'Vol._INR',
'Change %': 'Change %_INR'
}
# Use the rename method to update the column names
bitcoin_df.rename(columns=rename_dict, inplace=True)
print(bitcoin_df.head())
Date Price_GBP Open_GBP High_GBP Low_GBP Vol._GBP Change %_GBP \ 0 2024-03-01 49286.4 48428.9 49877.9 48087.1 290.0 1.77 1 2024-02-29 48428.9 48291.2 49826.0 46392.2 550.0 0.34 2 2024-02-28 48266.6 44822.7 50332.0 44600.0 720.0 7.72 3 2024-02-27 44808.9 42720.5 45120.0 41954.2 360.0 4.90 4 2024-02-26 42715.9 40829.1 43192.9 40129.8 270.0 4.62 Price_USD Open_USD High_USD Low_USD Vol._USD Change %_USD Price_INR \ 0 62346.0 61148.0 63000.0 60783.0 2510.0 1.96 5170624.0 1 61148.0 62380.0 63566.0 60385.0 4100.0 -1.97 5073948.0 2 62380.0 56998.0 63780.0 56668.0 6630.0 9.44 5176321.0 3 56998.0 54417.0 57487.0 54388.0 4500.0 4.74 4731766.0 4 54417.0 51716.0 54720.0 50903.0 4390.0 5.22 4514234.0 Open_INR High_INR Low_INR Vol._INR Change %_INR 0 5071696.0 5227076.0 5036433.0 NaN 1.91 1 5176916.0 5279115.0 5017673.0 NaN -1.98 2 4731160.0 5296428.0 4699779.0 NaN 9.40 3 4513558.0 4768902.0 4512017.0 NaN 4.82 4 4287393.0 4548782.0 4221497.0 NaN 5.28
ethereum_df = pd.read_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\merged_ethereum_dataset.csv')
rename_dict1 = {
'Price_x': 'Price_USD',
'Price_y': 'Price_GBP',
'Price': 'Price_INR',
'Open_x': 'Open_USD',
'High_x': 'High_USD',
'Low_x': 'Low_USD',
'Vol._x': 'Vol._USD',
'Change %_x': 'Change %_USD',
'Open_y': 'Open_GBP',
'High_y': 'High_GBP',
'Low_y': 'Low_GBP',
'Vol._y': 'Vol._GBP',
'Change %_y': 'Change %_GBP',
'Open': 'Open_INR',
'High': 'High_INR',
'Low': 'Low_INR',
'Vol.': 'Vol._INR',
'Change %': 'Change %_INR'
}
ethereum_df.rename(columns=rename_dict1, inplace=True)
print(ethereum_df.head())
Date Price_USD Open_USD High_USD Low_USD Vol._USD \ 0 2024-03-01 3433.51 3339.25 3449.56 3339.24 431980.0 1 2024-02-29 3339.26 3382.98 3520.80 3300.42 802170.0 2 2024-02-28 3383.03 3242.44 3482.48 3183.92 1010000.0 3 2024-02-27 3242.14 3175.94 3285.01 3165.91 674560.0 4 2024-02-26 3175.94 3112.59 3193.67 3038.00 582390.0 Change %_USD Price_GBP Open_GBP High_GBP Low_GBP Vol._GBP \ 0 2.82 2712.34 2645.76 2730.76 2643.83 1840.0 1 -1.29 2646.04 2617.37 2772.57 2570.71 4620.0 2 4.35 2616.93 2546.90 2748.91 2534.89 4350.0 3 2.08 2546.90 2490.22 2581.24 2409.79 2890.0 4 2.04 2493.37 2456.94 2515.03 2392.68 2450.0 Change %_GBP Price_INR Open_INR High_INR Low_INR Vol._INR \ 0 2.51 284443.0 277151.0 285858.0 276932.0 NaN 1 1.11 276986.0 280510.0 291857.0 273646.0 NaN 2 2.75 280525.0 268963.0 288714.0 263953.0 NaN 3 2.15 269047.0 263240.0 272341.0 262357.0 NaN 4 1.48 263146.0 258124.0 264699.0 251787.0 NaN Change %_INR 0 2.69 1 -1.26 2 4.27 3 2.24 4 1.98
correlation_matrix = bitcoin_df[['Price_GBP', 'Price_USD', 'Price_INR']].corr()
plt.figure(figsize=(6, 4))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Price Correlation Between Currencies for Bitcoin')
plt.show()
correlation_matrix1 = ethereum_df[['Price_GBP', 'Price_USD', 'Price_INR']].corr()
plt.figure(figsize=(6, 4))
sns.heatmap(correlation_matrix1, annot=True, cmap='coolwarm')
plt.title('Price Correlation Between Currencies for Ethereum')
plt.show()
import plotly.graph_objs as go
btc_trace = go.Scatter(
x=bitcoin_df['Date'],
y=bitcoin_df['Price_USD'],
mode='lines',
name='Bitcoin',
hoverinfo='text',
text=[
f'Date: {date}<br>USD: ${usd}<br>GBP: £{gbp}<br>INR: ₹{inr}'
for date, usd, gbp, inr in zip(bitcoin_df['Date'], bitcoin_df['Price_USD'], bitcoin_df['Price_GBP'], bitcoin_df['Price_INR'])
]
)
eth_trace = go.Scatter(
x=ethereum_df['Date'],
y=ethereum_df['Price_USD'],
mode='lines',
name='Ethereum',
hoverinfo='text',
text=[
f'Date: {date}<br>USD: ${usd}<br>GBP: £{gbp}<br>INR: ₹{inr}'
for date, usd, gbp, inr in zip(ethereum_df['Date'], ethereum_df['Price_USD'], ethereum_df['Price_GBP'], ethereum_df['Price_INR'])
]
)
# Create a figure
fig = go.Figure()
# Add the Bitcoin and Ethereum trace
fig.add_trace(btc_trace)
fig.add_trace(eth_trace)
# Define updatemenu
updatemenu = dict(
buttons=[
dict(label="Bitcoin",
method="update",
args=[{"visible": [True, False, False]},
{"title": "Bitcoin Price Trend"}]),
dict(label="Ethereum",
method="update",
args=[{"visible": [False, True, True]},
{"title": "Ethereum Price Trend"}])
],
direction="down",
pad={"r": 10, "t": 10},
showactive=True,
x=0.97,
xanchor="right",
y=1.15,
yanchor="top"
)
# Update the layout with the new updatemenu
fig.update_layout(updatemenus=[updatemenu])
fig.update_layout(
title_text="Cryptocurrency Price Trend",
xaxis_title='Date',
yaxis_title='Price'
)
# Show the plot
fig.show()
# Convert 'Date' columns to datetime format
ethereum_df['Date'] = pd.to_datetime(ethereum_df['Date'])
bitcoin_df['Date'] = pd.to_datetime(bitcoin_df['Date'])
# Merge datasets on 'Date' for a combined visualization
combined_df = pd.merge(ethereum_df[['Date', 'Price_USD', 'Vol._USD']], bitcoin_df[['Date', 'Price_USD', 'Vol._USD']], on='Date', suffixes=('_eth', '_btc'))
fig_log = make_subplots(specs=[[{"secondary_y": True}]])
# Ethereum volume bars
fig_log.add_trace(go.Bar(x=combined_df['Date'], y=combined_df['Vol._USD_eth'], name='Ethereum',
marker_color='green', opacity=0.9, legendgroup='Ethereum', showlegend=True), secondary_y=True)
# Ethereum price line
fig_log.add_trace(go.Scatter(x=combined_df['Date'], y=combined_df['Price_USD_eth'], mode='lines',
line=dict(color='green'), legendgroup='Ethereum', showlegend=False), secondary_y=False)
# Bitcoin volume bars
fig_log.add_trace(go.Bar(x=combined_df['Date'], y=combined_df['Vol._USD_btc'], name='Bitcoin',
marker_color='red', opacity=0.9, legendgroup='Bitcoin', showlegend=True), secondary_y=True)
# Bitcoin price line
fig_log.add_trace(go.Scatter(x=combined_df['Date'], y=combined_df['Price_USD_btc'], mode='lines',
line=dict(color='red'), legendgroup='Bitcoin', showlegend=False), secondary_y=False)
# Add figure title
fig_log.update_layout(title_text="Ethereum and Bitcoin: Price and Trading Volume Over Time (Log Scale for Volume)")
# Set x-axis title
fig_log.update_xaxes(title_text="Date")
# Set y-axes titles
fig_log.update_yaxes(title_text="<b>Price</b> (USD)", secondary_y=False)
fig_log.update_yaxes(title_text="<b>Volume</b> (USD) - Log Scale", secondary_y=True, type="log")
fig_log.show()
btc_candle_fig = go.Figure(data=[go.Candlestick(x=bitcoin_df['Date'],
open=bitcoin_df['Open_USD'], high=bitcoin_df['High_USD'],
low=bitcoin_df['Low_USD'], close=bitcoin_df['Price_USD'])])
btc_candle_fig.update_layout(title='Bitcoin Candlestick Chart in USD', xaxis_title='Date', yaxis_title='Price')
btc_candle_fig.show()
eth_candle_fig = go.Figure(data=[go.Candlestick(x=ethereum_df['Date'],
open=ethereum_df['Open_USD'], high=ethereum_df['High_USD'],
low=ethereum_df['Low_USD'], close=ethereum_df['Price_USD'])])
eth_candle_fig.update_layout(title='Ethereum Candlestick Chart in USD', xaxis_title='Date', yaxis_title='Price')
eth_candle_fig.show()
# Load US economic data
inflation_usa_df = pd.read_excel('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\Interest Rate\\InflationRate_USA.xls')
interest_usa_df = pd.read_excel('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\Interest Rate\\Interest_USA.xls')
# Displaying the first few rows of each dataset to understand their structure
print("Bitcoin Data:\n", bitcoin_df.head())
print("Ethereum Data:\n", ethereum_df.head())
print("USA Inflation Rate Data:\n", inflation_usa_df.head())
print("USA Interest Rate Data:\n", interest_usa_df.head())
Bitcoin Data: Date Price_GBP Open_GBP High_GBP Low_GBP Vol._GBP Change %_GBP \ 0 2024-03-01 49286.4 48428.9 49877.9 48087.1 290.0 1.77 1 2024-02-29 48428.9 48291.2 49826.0 46392.2 550.0 0.34 2 2024-02-28 48266.6 44822.7 50332.0 44600.0 720.0 7.72 3 2024-02-27 44808.9 42720.5 45120.0 41954.2 360.0 4.90 4 2024-02-26 42715.9 40829.1 43192.9 40129.8 270.0 4.62 Price_USD Open_USD High_USD Low_USD Vol._USD Change %_USD Price_INR \ 0 62346.0 61148.0 63000.0 60783.0 2510.0 1.96 5170624.0 1 61148.0 62380.0 63566.0 60385.0 4100.0 -1.97 5073948.0 2 62380.0 56998.0 63780.0 56668.0 6630.0 9.44 5176321.0 3 56998.0 54417.0 57487.0 54388.0 4500.0 4.74 4731766.0 4 54417.0 51716.0 54720.0 50903.0 4390.0 5.22 4514234.0 Open_INR High_INR Low_INR Vol._INR Change %_INR 0 5071696.0 5227076.0 5036433.0 NaN 1.91 1 5176916.0 5279115.0 5017673.0 NaN -1.98 2 4731160.0 5296428.0 4699779.0 NaN 9.40 3 4513558.0 4768902.0 4512017.0 NaN 4.82 4 4287393.0 4548782.0 4221497.0 NaN 5.28 Ethereum Data: Date Price_USD Open_USD High_USD Low_USD Vol._USD Change %_USD \ 0 2024-03-01 3433.51 3339.25 3449.56 3339.24 431980.0 2.82 1 2024-02-29 3339.26 3382.98 3520.80 3300.42 802170.0 -1.29 2 2024-02-28 3383.03 3242.44 3482.48 3183.92 1010000.0 4.35 3 2024-02-27 3242.14 3175.94 3285.01 3165.91 674560.0 2.08 4 2024-02-26 3175.94 3112.59 3193.67 3038.00 582390.0 2.04 Price_GBP Open_GBP High_GBP Low_GBP Vol._GBP Change %_GBP Price_INR \ 0 2712.34 2645.76 2730.76 2643.83 1840.0 2.51 284443.0 1 2646.04 2617.37 2772.57 2570.71 4620.0 1.11 276986.0 2 2616.93 2546.90 2748.91 2534.89 4350.0 2.75 280525.0 3 2546.90 2490.22 2581.24 2409.79 2890.0 2.15 269047.0 4 2493.37 2456.94 2515.03 2392.68 2450.0 1.48 263146.0 Open_INR High_INR Low_INR Vol._INR Change %_INR 0 277151.0 285858.0 276932.0 NaN 2.69 1 280510.0 291857.0 273646.0 NaN -1.26 2 268963.0 288714.0 263953.0 NaN 4.27 3 263240.0 272341.0 262357.0 NaN 2.24 4 258124.0 264699.0 251787.0 NaN 1.98 USA Inflation Rate Data: Date inflation_rate 0 2021-03-01 2.40 1 2021-03-02 2.43 2 2021-03-03 2.45 3 2021-03-04 2.43 4 2021-03-05 2.43 USA Interest Rate Data: Date interest_rate 0 2021-03-01 0.64 1 2021-03-02 0.60 2 2021-03-03 0.66 3 2021-03-04 0.70 4 2021-03-05 0.72
# Convert date columns to datetime format
inflation_usa_df['Date'] = pd.to_datetime(inflation_usa_df['Date'])
interest_usa_df['Date'] = pd.to_datetime(interest_usa_df['Date'])
# Set dates as indices
inflation_usa_df.set_index('Date', inplace=True)
interest_usa_df.set_index('Date', inplace=True)
# Merge US economic data into a single dataframe
usa_economic_data = pd.merge(inflation_usa_df, interest_usa_df, left_index=True, right_index=True)
# Convert cryptocurrency data dates to datetime format
bitcoin_df['Date'] = pd.to_datetime(bitcoin_df['Date'])
ethereum_df['Date'] = pd.to_datetime(ethereum_df['Date'])
# Set cryptocurrency data dates as indices
bitcoin_df.set_index('Date', inplace=True)
ethereum_df.set_index('Date', inplace=True)
# Display combined USA economic data
print("Combined USA Economic Data:\n", usa_economic_data.head())
Combined USA Economic Data: inflation_rate interest_rate Date 2021-03-01 2.40 0.64 2021-03-02 2.43 0.60 2021-03-03 2.45 0.66 2021-03-04 2.43 0.70 2021-03-05 2.43 0.72
usa_economic_data
inflation_rate | interest_rate | |
---|---|---|
Date | ||
2021-03-01 | 2.40 | 0.64 |
2021-03-02 | 2.43 | 0.60 |
2021-03-03 | 2.45 | 0.66 |
2021-03-04 | 2.43 | 0.70 |
2021-03-05 | 2.43 | 0.72 |
... | ... | ... |
2024-02-26 | 2.36 | -1.04 |
2024-02-27 | 2.38 | -1.01 |
2024-02-28 | 2.38 | -1.07 |
2024-02-29 | 2.40 | -1.07 |
2024-03-01 | 2.40 | -1.16 |
785 rows × 2 columns
# Calculate daily returns
bitcoin_df['BTC_Returns_USD'] = bitcoin_df['Price_USD'].pct_change()
ethereum_df['ETH_Returns_USD'] = ethereum_df['Price_USD'].pct_change()
# 100-day rolling volatility
bitcoin_df['BTC_Volatility_USD'] = bitcoin_df['BTC_Returns_USD'].rolling(window=100).std()
ethereum_df['ETH_Volatility_USD'] = ethereum_df['ETH_Returns_USD'].rolling(window=100).std()
# Display the cryptocurrency data with added volatility
print("Bitcoin Data with Volatility:\n", bitcoin_df.head())
print("Ethereum Data with Volatility:\n", ethereum_df.head())
Bitcoin Data with Volatility: Price_GBP Open_GBP High_GBP Low_GBP Vol._GBP Change %_GBP \ Date 2024-03-01 49286.4 48428.9 49877.9 48087.1 290.0 1.77 2024-02-29 48428.9 48291.2 49826.0 46392.2 550.0 0.34 2024-02-28 48266.6 44822.7 50332.0 44600.0 720.0 7.72 2024-02-27 44808.9 42720.5 45120.0 41954.2 360.0 4.90 2024-02-26 42715.9 40829.1 43192.9 40129.8 270.0 4.62 Price_USD Open_USD High_USD Low_USD Vol._USD Change %_USD \ Date 2024-03-01 62346.0 61148.0 63000.0 60783.0 2510.0 1.96 2024-02-29 61148.0 62380.0 63566.0 60385.0 4100.0 -1.97 2024-02-28 62380.0 56998.0 63780.0 56668.0 6630.0 9.44 2024-02-27 56998.0 54417.0 57487.0 54388.0 4500.0 4.74 2024-02-26 54417.0 51716.0 54720.0 50903.0 4390.0 5.22 Price_INR Open_INR High_INR Low_INR Vol._INR \ Date 2024-03-01 5170624.0 5071696.0 5227076.0 5036433.0 NaN 2024-02-29 5073948.0 5176916.0 5279115.0 5017673.0 NaN 2024-02-28 5176321.0 4731160.0 5296428.0 4699779.0 NaN 2024-02-27 4731766.0 4513558.0 4768902.0 4512017.0 NaN 2024-02-26 4514234.0 4287393.0 4548782.0 4221497.0 NaN Change %_INR BTC_Returns_USD BTC_Volatility_USD Date 2024-03-01 1.91 NaN NaN 2024-02-29 -1.98 -0.019215 NaN 2024-02-28 9.40 0.020148 NaN 2024-02-27 4.82 -0.086278 NaN 2024-02-26 5.28 -0.045282 NaN Ethereum Data with Volatility: Price_USD Open_USD High_USD Low_USD Vol._USD Change %_USD \ Date 2024-03-01 3433.51 3339.25 3449.56 3339.24 431980.0 2.82 2024-02-29 3339.26 3382.98 3520.80 3300.42 802170.0 -1.29 2024-02-28 3383.03 3242.44 3482.48 3183.92 1010000.0 4.35 2024-02-27 3242.14 3175.94 3285.01 3165.91 674560.0 2.08 2024-02-26 3175.94 3112.59 3193.67 3038.00 582390.0 2.04 Price_GBP Open_GBP High_GBP Low_GBP Vol._GBP Change %_GBP \ Date 2024-03-01 2712.34 2645.76 2730.76 2643.83 1840.0 2.51 2024-02-29 2646.04 2617.37 2772.57 2570.71 4620.0 1.11 2024-02-28 2616.93 2546.90 2748.91 2534.89 4350.0 2.75 2024-02-27 2546.90 2490.22 2581.24 2409.79 2890.0 2.15 2024-02-26 2493.37 2456.94 2515.03 2392.68 2450.0 1.48 Price_INR Open_INR High_INR Low_INR Vol._INR Change %_INR \ Date 2024-03-01 284443.0 277151.0 285858.0 276932.0 NaN 2.69 2024-02-29 276986.0 280510.0 291857.0 273646.0 NaN -1.26 2024-02-28 280525.0 268963.0 288714.0 263953.0 NaN 4.27 2024-02-27 269047.0 263240.0 272341.0 262357.0 NaN 2.24 2024-02-26 263146.0 258124.0 264699.0 251787.0 NaN 1.98 ETH_Returns_USD ETH_Volatility_USD Date 2024-03-01 NaN NaN 2024-02-29 -0.027450 NaN 2024-02-28 0.013108 NaN 2024-02-27 -0.041646 NaN 2024-02-26 -0.020419 NaN
# Merge Bitcoin and Ethereum data with US economic data
btc_usa_data = pd.merge(bitcoin_df, usa_economic_data, left_index=True, right_index=True)
eth_usa_data = pd.merge(ethereum_df, usa_economic_data, left_index=True, right_index=True)
# Display the merged datasets
print("Bitcoin & US Data:\n", btc_usa_data.head())
print("Ethereum & US Data:\n", eth_usa_data.head())
Bitcoin & US Data: Price_GBP Open_GBP High_GBP Low_GBP Vol._GBP Change %_GBP \ Date 2024-03-01 49286.4 48428.9 49877.9 48087.1 290.0 1.77 2024-02-29 48428.9 48291.2 49826.0 46392.2 550.0 0.34 2024-02-28 48266.6 44822.7 50332.0 44600.0 720.0 7.72 2024-02-27 44808.9 42720.5 45120.0 41954.2 360.0 4.90 2024-02-26 42715.9 40829.1 43192.9 40129.8 270.0 4.62 Price_USD Open_USD High_USD Low_USD ... Price_INR Open_INR \ Date ... 2024-03-01 62346.0 61148.0 63000.0 60783.0 ... 5170624.0 5071696.0 2024-02-29 61148.0 62380.0 63566.0 60385.0 ... 5073948.0 5176916.0 2024-02-28 62380.0 56998.0 63780.0 56668.0 ... 5176321.0 4731160.0 2024-02-27 56998.0 54417.0 57487.0 54388.0 ... 4731766.0 4513558.0 2024-02-26 54417.0 51716.0 54720.0 50903.0 ... 4514234.0 4287393.0 High_INR Low_INR Vol._INR Change %_INR BTC_Returns_USD \ Date 2024-03-01 5227076.0 5036433.0 NaN 1.91 NaN 2024-02-29 5279115.0 5017673.0 NaN -1.98 -0.019215 2024-02-28 5296428.0 4699779.0 NaN 9.40 0.020148 2024-02-27 4768902.0 4512017.0 NaN 4.82 -0.086278 2024-02-26 4548782.0 4221497.0 NaN 5.28 -0.045282 BTC_Volatility_USD inflation_rate interest_rate Date 2024-03-01 NaN 2.40 -1.16 2024-02-29 NaN 2.40 -1.07 2024-02-28 NaN 2.38 -1.07 2024-02-27 NaN 2.38 -1.01 2024-02-26 NaN 2.36 -1.04 [5 rows x 22 columns] Ethereum & US Data: Price_USD Open_USD High_USD Low_USD Vol._USD Change %_USD \ Date 2024-03-01 3433.51 3339.25 3449.56 3339.24 431980.0 2.82 2024-02-29 3339.26 3382.98 3520.80 3300.42 802170.0 -1.29 2024-02-28 3383.03 3242.44 3482.48 3183.92 1010000.0 4.35 2024-02-27 3242.14 3175.94 3285.01 3165.91 674560.0 2.08 2024-02-26 3175.94 3112.59 3193.67 3038.00 582390.0 2.04 Price_GBP Open_GBP High_GBP Low_GBP ... Price_INR Open_INR \ Date ... 2024-03-01 2712.34 2645.76 2730.76 2643.83 ... 284443.0 277151.0 2024-02-29 2646.04 2617.37 2772.57 2570.71 ... 276986.0 280510.0 2024-02-28 2616.93 2546.90 2748.91 2534.89 ... 280525.0 268963.0 2024-02-27 2546.90 2490.22 2581.24 2409.79 ... 269047.0 263240.0 2024-02-26 2493.37 2456.94 2515.03 2392.68 ... 263146.0 258124.0 High_INR Low_INR Vol._INR Change %_INR ETH_Returns_USD \ Date 2024-03-01 285858.0 276932.0 NaN 2.69 NaN 2024-02-29 291857.0 273646.0 NaN -1.26 -0.027450 2024-02-28 288714.0 263953.0 NaN 4.27 0.013108 2024-02-27 272341.0 262357.0 NaN 2.24 -0.041646 2024-02-26 264699.0 251787.0 NaN 1.98 -0.020419 ETH_Volatility_USD inflation_rate interest_rate Date 2024-03-01 NaN 2.40 -1.16 2024-02-29 NaN 2.40 -1.07 2024-02-28 NaN 2.38 -1.07 2024-02-27 NaN 2.38 -1.01 2024-02-26 NaN 2.36 -1.04 [5 rows x 22 columns]
print(btc_usa_data[['interest_rate', 'inflation_rate', 'BTC_Volatility_USD']].isnull().sum())
print(eth_usa_data[['interest_rate', 'inflation_rate', 'ETH_Volatility_USD']].isnull().sum())
interest_rate 31 inflation_rate 31 BTC_Volatility_USD 72 dtype: int64 interest_rate 31 inflation_rate 31 ETH_Volatility_USD 72 dtype: int64
btc_usa_data = btc_usa_data.dropna(subset=['interest_rate', 'inflation_rate', 'BTC_Volatility_USD'])
eth_usa_data = eth_usa_data.dropna(subset=['interest_rate', 'inflation_rate', 'ETH_Volatility_USD'])
print(btc_usa_data[['interest_rate', 'inflation_rate', 'BTC_Volatility_USD']].isnull().sum())
print(eth_usa_data[['interest_rate', 'inflation_rate', 'ETH_Volatility_USD']].isnull().sum())
interest_rate 0 inflation_rate 0 BTC_Volatility_USD 0 dtype: int64 interest_rate 0 inflation_rate 0 ETH_Volatility_USD 0 dtype: int64
print(btc_usa_data[['interest_rate', 'inflation_rate', 'BTC_Volatility_USD']].describe())
print(eth_usa_data[['interest_rate', 'inflation_rate', 'ETH_Volatility_USD']].describe())
interest_rate inflation_rate BTC_Volatility_USD count 687.000000 687.000000 687.000000 mean 0.433857 2.556696 0.031123 std 1.110229 0.325399 0.008458 min -1.790000 2.070000 0.015961 25% -0.705000 2.290000 0.024272 50% 0.740000 2.510000 0.032284 75% 1.170000 2.720000 0.036521 max 2.780000 3.590000 0.049140 interest_rate inflation_rate ETH_Volatility_USD count 687.000000 687.000000 687.000000 mean 0.433857 2.556696 0.039776 std 1.110229 0.325399 0.014885 min -1.790000 2.070000 0.016178 25% -0.705000 2.290000 0.026996 50% 0.740000 2.510000 0.041584 75% 1.170000 2.720000 0.047648 max 2.780000 3.590000 0.074506
btc_usa_data['BTC_Volatility_USD'] = btc_usa_data['BTC_Volatility_USD'].clip(upper=1) # Adjust limit as needed
eth_usa_data['ETH_Volatility_USD'] = eth_usa_data['ETH_Volatility_USD'].clip(upper=1) # Adjust limit as needed
# Correlation Matrix
btc_corr_usa = btc_usa_data[['interest_rate', 'inflation_rate', 'BTC_Volatility_USD']].corr()
# Regression Analysis
X_btc = btc_usa_data[['interest_rate', 'inflation_rate']]
y_btc = btc_usa_data['BTC_Volatility_USD']
reg_btc = LinearRegression().fit(X_btc, y_btc)
btc_pred = reg_btc.predict(X_btc)
btc_r2 = r2_score(y_btc, btc_pred)
# Output the correlation matrix and R2 score
print("Bitcoin Correlation Matrix:\n", btc_corr_usa)
print(f"Bitcoin R2: {btc_r2}")
Bitcoin Correlation Matrix: interest_rate inflation_rate BTC_Volatility_USD interest_rate 1.000000 0.848611 0.757436 inflation_rate 0.848611 1.000000 0.621265 BTC_Volatility_USD 0.757436 0.621265 1.000000 Bitcoin R2: 0.5753622028334557
# Correlation Matrix for Ethereum
eth_corr_usa = eth_usa_data[['interest_rate', 'inflation_rate', 'ETH_Volatility_USD']].corr()
# Regression Analysis for Ethereum
X_eth = eth_usa_data[['interest_rate', 'inflation_rate']]
y_eth = eth_usa_data['ETH_Volatility_USD']
# Create a linear regression model
reg_eth = LinearRegression().fit(X_eth, y_eth)
# Make predictions using the regression model
eth_pred = reg_eth.predict(X_eth)
# Calculate the R2 score to measure the accuracy of the model
eth_r2 = r2_score(y_eth, eth_pred)
# Output the correlation matrix and R2 score
print("Ethereum Correlation Matrix:\n", eth_corr_usa)
print(f"Ethereum R2: {eth_r2}")
Ethereum Correlation Matrix: interest_rate inflation_rate ETH_Volatility_USD interest_rate 1.000000 0.848611 0.745792 inflation_rate 0.848611 1.000000 0.566951 ETH_Volatility_USD 0.745792 0.566951 1.000000 Ethereum R2: 0.5717404048555825
# Visualization function for correlation matrix
def visualize_correlation_matrix(corr_matrix, title):
plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title(title)
plt.show()
# Visualizing the correlation matrices
visualize_correlation_matrix(btc_corr_usa, "Bitcoin Correlation Matrix (USD)")
visualize_correlation_matrix(eth_corr_usa, "Ethereum Correlation Matrix (USD)")
date_range = pd.date_range(start='2021-03-01', end='2024-03-01', freq='D')
# Add date column to the cryptocurrency datasets
bitcoin_df['Date'] = date_range[:len(bitcoin_df)]
ethereum_df['Date'] = date_range[:len(ethereum_df)]
# Convert to datetime and set as indices
bitcoin_df['Date'] = pd.to_datetime(bitcoin_df['Date'])
ethereum_df['Date'] = pd.to_datetime(ethereum_df['Date'])
bitcoin_df.set_index('Date', inplace=True)
ethereum_df.set_index('Date', inplace=True)
# Load UK economic data
inflation_uk_df = pd.read_excel('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\Interest Rate\\InflationRate_UK.xls')
interest_uk_df = pd.read_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\Interest Rate\\Interest_UK.csv')
# Convert date columns to datetime format
inflation_uk_df['Date'] = pd.to_datetime(inflation_uk_df['Date'])
interest_uk_df['Date'] = pd.to_datetime(interest_uk_df['Date'])
# Set dates as indices
inflation_uk_df.set_index('Date', inplace=True)
interest_uk_df.set_index('Date', inplace=True)
# Combine UK economic data into a single dataframe
uk_economic_data = pd.merge(inflation_uk_df, interest_uk_df, left_index=True, right_index=True)
# Display the combined UK economic data
print("UK Economic Data:\n", uk_economic_data.head())
UK Economic Data: inflation_rate interest_rate Date 2021-03-01 1.0 0.1 2021-03-02 1.0 0.1 2021-03-03 1.0 0.1 2021-03-04 1.0 0.1 2021-03-05 1.0 0.1
uk_economic_data
inflation_rate | interest_rate | |
---|---|---|
Date | ||
2021-03-01 | 1.0 | 0.10 |
2021-03-02 | 1.0 | 0.10 |
2021-03-03 | 1.0 | 0.10 |
2021-03-04 | 1.0 | 0.10 |
2021-03-05 | 1.0 | 0.10 |
... | ... | ... |
2024-02-26 | 3.8 | 5.25 |
2024-02-27 | 3.8 | 5.25 |
2024-02-28 | 3.8 | 5.25 |
2024-02-29 | 3.8 | 5.25 |
2024-03-01 | 3.8 | 5.25 |
785 rows × 2 columns
# Calculate daily returns
bitcoin_df['BTC_Returns_GBP'] = bitcoin_df['Price_GBP'].pct_change()
ethereum_df['ETH_Returns_GBP'] = ethereum_df['Price_GBP'].pct_change()
# 100-day rolling volatility
bitcoin_df['BTC_Volatility_GBP'] = bitcoin_df['BTC_Returns_GBP'].rolling(window=100).std()
ethereum_df['ETH_Volatility_GBP'] = ethereum_df['ETH_Returns_GBP'].rolling(window=100).std()
# Display the cryptocurrency data with added volatility
print("Bitcoin Data with Volatility:\n", bitcoin_df.head())
print("Ethereum Data with Volatility:\n", ethereum_df.head())
Bitcoin Data with Volatility: Price_GBP Open_GBP High_GBP Low_GBP Vol._GBP Change %_GBP \ Date 2021-03-01 49286.4 48428.9 49877.9 48087.1 290.0 1.77 2021-03-02 48428.9 48291.2 49826.0 46392.2 550.0 0.34 2021-03-03 48266.6 44822.7 50332.0 44600.0 720.0 7.72 2021-03-04 44808.9 42720.5 45120.0 41954.2 360.0 4.90 2021-03-05 42715.9 40829.1 43192.9 40129.8 270.0 4.62 Price_USD Open_USD High_USD Low_USD ... Price_INR Open_INR \ Date ... 2021-03-01 62346.0 61148.0 63000.0 60783.0 ... 5170624.0 5071696.0 2021-03-02 61148.0 62380.0 63566.0 60385.0 ... 5073948.0 5176916.0 2021-03-03 62380.0 56998.0 63780.0 56668.0 ... 5176321.0 4731160.0 2021-03-04 56998.0 54417.0 57487.0 54388.0 ... 4731766.0 4513558.0 2021-03-05 54417.0 51716.0 54720.0 50903.0 ... 4514234.0 4287393.0 High_INR Low_INR Vol._INR Change %_INR BTC_Returns_USD \ Date 2021-03-01 5227076.0 5036433.0 NaN 1.91 NaN 2021-03-02 5279115.0 5017673.0 NaN -1.98 -0.019215 2021-03-03 5296428.0 4699779.0 NaN 9.40 0.020148 2021-03-04 4768902.0 4512017.0 NaN 4.82 -0.086278 2021-03-05 4548782.0 4221497.0 NaN 5.28 -0.045282 BTC_Volatility_USD BTC_Returns_GBP BTC_Volatility_GBP Date 2021-03-01 NaN NaN NaN 2021-03-02 NaN -0.017398 NaN 2021-03-03 NaN -0.003351 NaN 2021-03-04 NaN -0.071638 NaN 2021-03-05 NaN -0.046709 NaN [5 rows x 22 columns] Ethereum Data with Volatility: Price_USD Open_USD High_USD Low_USD Vol._USD Change %_USD \ Date 2021-03-01 3433.51 3339.25 3449.56 3339.24 431980.0 2.82 2021-03-02 3339.26 3382.98 3520.80 3300.42 802170.0 -1.29 2021-03-03 3383.03 3242.44 3482.48 3183.92 1010000.0 4.35 2021-03-04 3242.14 3175.94 3285.01 3165.91 674560.0 2.08 2021-03-05 3175.94 3112.59 3193.67 3038.00 582390.0 2.04 Price_GBP Open_GBP High_GBP Low_GBP ... Price_INR Open_INR \ Date ... 2021-03-01 2712.34 2645.76 2730.76 2643.83 ... 284443.0 277151.0 2021-03-02 2646.04 2617.37 2772.57 2570.71 ... 276986.0 280510.0 2021-03-03 2616.93 2546.90 2748.91 2534.89 ... 280525.0 268963.0 2021-03-04 2546.90 2490.22 2581.24 2409.79 ... 269047.0 263240.0 2021-03-05 2493.37 2456.94 2515.03 2392.68 ... 263146.0 258124.0 High_INR Low_INR Vol._INR Change %_INR ETH_Returns_USD \ Date 2021-03-01 285858.0 276932.0 NaN 2.69 NaN 2021-03-02 291857.0 273646.0 NaN -1.26 -0.027450 2021-03-03 288714.0 263953.0 NaN 4.27 0.013108 2021-03-04 272341.0 262357.0 NaN 2.24 -0.041646 2021-03-05 264699.0 251787.0 NaN 1.98 -0.020419 ETH_Volatility_USD ETH_Returns_GBP ETH_Volatility_GBP Date 2021-03-01 NaN NaN NaN 2021-03-02 NaN -0.024444 NaN 2021-03-03 NaN -0.011001 NaN 2021-03-04 NaN -0.026760 NaN 2021-03-05 NaN -0.021018 NaN [5 rows x 22 columns]
# Merge Bitcoin and Ethereum data with US economic data
btc_uk_data = pd.merge(bitcoin_df, uk_economic_data, left_index=True, right_index=True)
eth_uk_data = pd.merge(ethereum_df, uk_economic_data, left_index=True, right_index=True)
# Display the merged datasets
print("Bitcoin & UK Data:\n", btc_uk_data.head())
print("Ethereum & UK Data:\n", eth_uk_data.head())
Bitcoin & UK Data: Price_GBP Open_GBP High_GBP Low_GBP Vol._GBP Change %_GBP \ Date 2021-03-01 49286.4 48428.9 49877.9 48087.1 290.0 1.77 2021-03-02 48428.9 48291.2 49826.0 46392.2 550.0 0.34 2021-03-03 48266.6 44822.7 50332.0 44600.0 720.0 7.72 2021-03-04 44808.9 42720.5 45120.0 41954.2 360.0 4.90 2021-03-05 42715.9 40829.1 43192.9 40129.8 270.0 4.62 Price_USD Open_USD High_USD Low_USD ... High_INR Low_INR \ Date ... 2021-03-01 62346.0 61148.0 63000.0 60783.0 ... 5227076.0 5036433.0 2021-03-02 61148.0 62380.0 63566.0 60385.0 ... 5279115.0 5017673.0 2021-03-03 62380.0 56998.0 63780.0 56668.0 ... 5296428.0 4699779.0 2021-03-04 56998.0 54417.0 57487.0 54388.0 ... 4768902.0 4512017.0 2021-03-05 54417.0 51716.0 54720.0 50903.0 ... 4548782.0 4221497.0 Vol._INR Change %_INR BTC_Returns_USD BTC_Volatility_USD \ Date 2021-03-01 NaN 1.91 NaN NaN 2021-03-02 NaN -1.98 -0.019215 NaN 2021-03-03 NaN 9.40 0.020148 NaN 2021-03-04 NaN 4.82 -0.086278 NaN 2021-03-05 NaN 5.28 -0.045282 NaN BTC_Returns_GBP BTC_Volatility_GBP inflation_rate interest_rate Date 2021-03-01 NaN NaN 1.0 0.1 2021-03-02 -0.017398 NaN 1.0 0.1 2021-03-03 -0.003351 NaN 1.0 0.1 2021-03-04 -0.071638 NaN 1.0 0.1 2021-03-05 -0.046709 NaN 1.0 0.1 [5 rows x 24 columns] Ethereum & UK Data: Price_USD Open_USD High_USD Low_USD Vol._USD Change %_USD \ Date 2021-03-01 3433.51 3339.25 3449.56 3339.24 431980.0 2.82 2021-03-02 3339.26 3382.98 3520.80 3300.42 802170.0 -1.29 2021-03-03 3383.03 3242.44 3482.48 3183.92 1010000.0 4.35 2021-03-04 3242.14 3175.94 3285.01 3165.91 674560.0 2.08 2021-03-05 3175.94 3112.59 3193.67 3038.00 582390.0 2.04 Price_GBP Open_GBP High_GBP Low_GBP ... High_INR Low_INR \ Date ... 2021-03-01 2712.34 2645.76 2730.76 2643.83 ... 285858.0 276932.0 2021-03-02 2646.04 2617.37 2772.57 2570.71 ... 291857.0 273646.0 2021-03-03 2616.93 2546.90 2748.91 2534.89 ... 288714.0 263953.0 2021-03-04 2546.90 2490.22 2581.24 2409.79 ... 272341.0 262357.0 2021-03-05 2493.37 2456.94 2515.03 2392.68 ... 264699.0 251787.0 Vol._INR Change %_INR ETH_Returns_USD ETH_Volatility_USD \ Date 2021-03-01 NaN 2.69 NaN NaN 2021-03-02 NaN -1.26 -0.027450 NaN 2021-03-03 NaN 4.27 0.013108 NaN 2021-03-04 NaN 2.24 -0.041646 NaN 2021-03-05 NaN 1.98 -0.020419 NaN ETH_Returns_GBP ETH_Volatility_GBP inflation_rate interest_rate Date 2021-03-01 NaN NaN 1.0 0.1 2021-03-02 -0.024444 NaN 1.0 0.1 2021-03-03 -0.011001 NaN 1.0 0.1 2021-03-04 -0.026760 NaN 1.0 0.1 2021-03-05 -0.021018 NaN 1.0 0.1 [5 rows x 24 columns]
print(btc_uk_data[['interest_rate', 'inflation_rate', 'BTC_Volatility_GBP']].isnull().sum())
print(eth_uk_data[['interest_rate', 'inflation_rate', 'ETH_Volatility_GBP']].isnull().sum())
interest_rate 0 inflation_rate 0 BTC_Volatility_GBP 72 dtype: int64 interest_rate 0 inflation_rate 0 ETH_Volatility_GBP 72 dtype: int64
btc_uk_data = btc_uk_data.dropna(subset=['interest_rate', 'inflation_rate', 'BTC_Volatility_GBP'])
eth_uk_data = eth_uk_data.dropna(subset=['interest_rate', 'inflation_rate', 'ETH_Volatility_GBP'])
print(btc_uk_data[['interest_rate', 'inflation_rate', 'BTC_Volatility_GBP']].isnull().sum())
print(eth_uk_data[['interest_rate', 'inflation_rate', 'ETH_Volatility_GBP']].isnull().sum())
interest_rate 0 inflation_rate 0 BTC_Volatility_GBP 0 dtype: int64 interest_rate 0 inflation_rate 0 ETH_Volatility_GBP 0 dtype: int64
print(btc_uk_data[['interest_rate', 'inflation_rate', 'BTC_Volatility_GBP']].describe())
print(eth_uk_data[['interest_rate', 'inflation_rate', 'ETH_Volatility_GBP']].describe())
interest_rate inflation_rate BTC_Volatility_GBP count 713.000000 713.000000 713.000000 mean 2.608555 6.340112 0.030181 std 2.063923 2.274132 0.008185 min 0.100000 2.100000 0.015654 25% 0.500000 4.200000 0.024544 50% 2.250000 6.300000 0.029860 75% 5.000000 8.600000 0.034887 max 5.250000 9.600000 0.048875 interest_rate inflation_rate ETH_Volatility_GBP count 713.000000 713.000000 713.000000 mean 2.608555 6.340112 0.038634 std 2.063923 2.274132 0.014402 min 0.100000 2.100000 0.015750 25% 0.500000 4.200000 0.026773 50% 2.250000 6.300000 0.039481 75% 5.000000 8.600000 0.045408 max 5.250000 9.600000 0.073705
btc_uk_data['BTC_Volatility_GBP'] = btc_uk_data['BTC_Volatility_GBP'].clip(upper=1) # Adjust limit as needed
eth_uk_data['ETH_Volatility_GBP'] = eth_uk_data['ETH_Volatility_GBP'].clip(upper=1) # Adjust limit as needed
# Correlation Matrix
btc_corr_uk = btc_uk_data[['interest_rate', 'inflation_rate', 'BTC_Volatility_GBP']].corr()
# Regression Analysis
X_btc = btc_uk_data[['interest_rate', 'inflation_rate']]
y_btc = btc_uk_data['BTC_Volatility_GBP']
reg_btc = LinearRegression().fit(X_btc, y_btc)
btc_pred = reg_btc.predict(X_btc)
btc_r2 = r2_score(y_btc, btc_pred)
# Output the correlation matrix and R2 score
print("Bitcoin Correlation Matrix:\n", btc_corr_uk)
print(f"Bitcoin R2: {btc_r2}")
Bitcoin Correlation Matrix: interest_rate inflation_rate BTC_Volatility_GBP interest_rate 1.000000 0.203298 0.856792 inflation_rate 0.203298 1.000000 0.165234 BTC_Volatility_GBP 0.856792 0.165234 1.000000 Bitcoin R2: 0.7341760223211335
# Correlation Matrix for Ethereum
eth_corr_uk = eth_uk_data[['interest_rate', 'inflation_rate', 'ETH_Volatility_GBP']].corr()
# Prepare independent variables (X) and dependent variable (y)
X_eth = eth_uk_data[['interest_rate', 'inflation_rate']]
y_eth = eth_uk_data['ETH_Volatility_USD']
# Create a linear regression model
reg_eth = LinearRegression().fit(X_eth, y_eth)
# Make predictions using the regression model
eth_pred = reg_eth.predict(X_eth)
# Calculate the R2 score to measure the accuracy of the model
eth_r2 = r2_score(y_eth, eth_pred)
# Output the correlation matrix and R2 score
print("Ethereum Correlation Matrix:\n", eth_corr_uk)
print(f"Ethereum R2: {eth_r2}")
Ethereum Correlation Matrix: interest_rate inflation_rate ETH_Volatility_GBP interest_rate 1.000000 0.203298 0.785631 inflation_rate 0.203298 1.000000 0.221015 ETH_Volatility_GBP 0.785631 0.221015 1.000000 Ethereum R2: 0.6250908661470469
# Visualization function for correlation matrix
def visualize_correlation_matrix(corr_matrix, title):
plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title(title)
plt.show()
# Visualizing the correlation matrices
visualize_correlation_matrix(btc_corr_uk, "Bitcoin Correlation Matrix (UK)")
visualize_correlation_matrix(eth_corr_uk, "Ethereum Correlation Matrix (UK)")
# Defining a function to prepare data and run the model
def run_linear_model(dataframe, feature_col, target_col, test_size=0.2, random_state=42):
# Prepare the data
data_clean = dataframe.dropna(subset=[feature_col, target_col]).copy()
X = data_clean[[feature_col]]
y = data_clean[target_col]
# Splitting the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
# Creating and training the model
model = LinearRegression()
model.fit(X_train, y_train)
# Predicting and evaluating
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
# Results
return mse, r2
# Adding 30-day rolling averages to both Bitcoin and Ethereum datasets
bitcoin_df['30d_rolling_avg_usd'] = bitcoin_df['Price_USD'].rolling(window=30, min_periods=1).mean()
bitcoin_df['30d_rolling_avg_gbp'] = bitcoin_df['Price_GBP'].rolling(window=30, min_periods=1).mean()
ethereum_df['30d_rolling_avg_usd'] = ethereum_df['Price_USD'].rolling(window=30, min_periods=1).mean()
ethereum_df['30d_rolling_avg_gbp'] = ethereum_df['Price_GBP'].rolling(window=30, min_periods=1).mean()
# Running models for Bitcoin and Ethereum in USD and GBP
mse_btc_usd, r2_btc_usd = run_linear_model(bitcoin_df, '30d_rolling_avg_usd', 'Price_USD')
mse_btc_gbp, r2_btc_gbp = run_linear_model(bitcoin_df, '30d_rolling_avg_gbp', 'Price_GBP')
mse_eth_usd, r2_eth_usd = run_linear_model(ethereum_df, '30d_rolling_avg_usd', 'Price_USD')
mse_eth_gbp, r2_eth_gbp = run_linear_model(ethereum_df, '30d_rolling_avg_gbp', 'Price_GBP')
print("Linear Regression - Results")
print("Bitcoin USD - MSE:", mse_btc_usd, "R^2 Score:", r2_btc_usd)
print("Bitcoin GBP - MSE:", mse_btc_gbp, "R^2 Score:", r2_btc_gbp)
print("Ethereum USD - MSE:", mse_eth_usd, "R^2 Score:", r2_eth_usd)
print("Ethereum GBP - MSE:", mse_eth_gbp, "R^2 Score:", r2_eth_gbp)
Linear Regression - Results Bitcoin USD - MSE: 13721513.737440929 R^2 Score: 0.9135289380688452 Bitcoin GBP - MSE: 7615990.113462368 R^2 Score: 0.8981790657290526 Ethereum USD - MSE: 91914.67379741861 R^2 Score: 0.8691018986344661 Ethereum GBP - MSE: 50048.292438045515 R^2 Score: 0.8517974779901151
def run_random_forest_model(dataframe, feature_cols, target_col, random_state=0):
# Re-loading and preparing the data
dataframe['30d_rolling_avg'] = dataframe[target_col].rolling(window=30, min_periods=1).mean()
dataframe['30d_volatility'] = dataframe[target_col].rolling(window=30, min_periods=1).std()
# Filling any remaining NaN values with the mean of the column
dataframe.fillna(dataframe.mean(), inplace=True)
# Selecting features and re-standardizing
scaler = StandardScaler()
X = dataframe[feature_cols]
X_scaled = scaler.fit_transform(X)
y = dataframe[target_col]
# Splitting the data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=random_state)
# Fitting the Random Forest Model
rf_model = RandomForestRegressor(n_estimators=100, random_state=random_state)
rf_model.fit(X_train, y_train)
# Predicting and evaluating
y_pred_rf = rf_model.predict(X_test)
mse_rf = mean_squared_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)
# Return MSE and R² score
return mse_rf, r2_rf
# Define the feature columns common to all model runs
feature_cols = ['30d_rolling_avg', '30d_volatility', 'Vol._USD']
mse_btc_usd_rf, r2_btc_usd_rf = run_random_forest_model(bitcoin_df, feature_cols, 'Price_USD')
mse_btc_gbp_rf, r2_btc_gbp_rf = run_random_forest_model(bitcoin_df, feature_cols, 'Price_GBP')
mse_eth_usd_rf, r2_eth_usd_rf = run_random_forest_model(ethereum_df, feature_cols, 'Price_USD')
mse_eth_gbp_rf, r2_eth_gbp_rf = run_random_forest_model(ethereum_df, feature_cols, 'Price_GBP')
print("Random Forest - Results")
print("Bitcoin USD - RF MSE:", mse_btc_usd_rf, "RF R2 Score:", r2_btc_usd_rf)
print("Bitcoin GBP - RF MSE:", mse_btc_gbp_rf, "RF R2 Score:", r2_btc_gbp_rf)
print("Ethereum USD - RF MSE:", mse_eth_usd_rf, "RF R2 Score:", r2_eth_usd_rf)
print("Ethereum GBP - RF MSE:", mse_eth_gbp_rf, "RF R2 Score:", r2_eth_gbp_rf)
Random Forest - Results Bitcoin USD - RF MSE: 9751803.281279217 RF R2 Score: 0.9454346077877172 Bitcoin GBP - RF MSE: 5342113.424420669 RF R2 Score: 0.9359301270113953 Ethereum USD - RF MSE: 72238.21106403082 RF R2 Score: 0.9065374373570333 Ethereum GBP - RF MSE: 47663.14455619989 RF R2 Score: 0.8707663248926976
# Load the datasets
bitcoin_df = pd.read_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\merged_bitcoin_dataset.csv')
ethereum_df = pd.read_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\merged_ethereum_dataset.csv')
# Convert 'Date' column to datetime format for both datasets
bitcoin_df['Date'] = pd.to_datetime(bitcoin_df['Date'])
ethereum_df['Date'] = pd.to_datetime(ethereum_df['Date'])
def apply_fourier_transform(df, price_column):
# Number of sample points
N = df.shape[0]
# Sample spacing
T = 1.0 # assuming daily data, the sample spacing is 1 day
x = df[price_column]
yf = np.fft.fft(x)
xf = np.fft.fftfreq(N, T)[:N//2]
plt.figure(figsize=(14, 7))
plt.plot(xf, 2.0/N * np.abs(yf[0:N//2]))
plt.grid()
plt.title(f'Fourier Transform - Frequency Components of {price_column}')
plt.xlabel('Frequency (1/day)')
plt.ylabel('Amplitude')
plt.show()
# Apply Fourier Transform to Bitcoin and Ethereum price data
apply_fourier_transform(bitcoin_df, 'Price')
apply_fourier_transform(ethereum_df, 'Price')
def prepare_data(df, price_column, sequence_length):
data = df[[price_column]].values
scaler = MinMaxScaler(feature_range=(0, 1))
data_normalized = scaler.fit_transform(data)
x, y = [], []
for i in range(sequence_length, len(data_normalized)):
x.append(data_normalized[i-sequence_length:i])
y.append(data_normalized[i, 0])
x, y = np.array(x), np.array(y)
# Split into train and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, shuffle=False)
return x_train, y_train, x_test, y_test, scaler
# Prepare data
sequence_length = 50
x_train_btc, y_train_btc, x_test_btc, y_test_btc, scaler_btc = prepare_data(bitcoin_df, 'Price', sequence_length)
x_train_eth, y_train_eth, x_test_eth, y_test_eth, scaler_eth = prepare_data(ethereum_df, 'Price', sequence_length)
def build_lstm_model(input_shape):
model = Sequential([
LSTM(50, return_sequences=True, input_shape=(input_shape[0], input_shape[1])),
Dropout(0.2),
LSTM(50, return_sequences=False),
Dropout(0.2),
Dense(1)
])
model.compile(optimizer='adam', loss='mean_squared_error')
return model
# Input shape correction
model_btc = build_lstm_model((x_train_btc.shape[1], x_train_btc.shape[2]))
model_eth = build_lstm_model((x_train_eth.shape[1], x_train_eth.shape[2]))
# Training the model
history_btc = model_btc.fit(x_train_btc, y_train_btc, epochs=50, batch_size=32, validation_data=(x_test_btc, y_test_btc), verbose=1)
history_eth = model_eth.fit(x_train_eth, y_train_eth, epochs=50, batch_size=32, validation_data=(x_test_eth, y_test_eth), verbose=1)
C:\Users\apoor_b31k2hq\AppData\Roaming\Python\Python39\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
Epoch 1/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 8s 102ms/step - loss: 0.0459 - val_loss: 0.0068 Epoch 2/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 3s 27ms/step - loss: 0.0056 - val_loss: 0.0062 Epoch 3/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 0.0038 - val_loss: 0.0063 Epoch 4/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 77ms/step - loss: 0.0035 - val_loss: 0.0056 Epoch 5/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 71ms/step - loss: 0.0041 - val_loss: 0.0056 Epoch 6/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 74ms/step - loss: 0.0034 - val_loss: 0.0051 Epoch 7/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 69ms/step - loss: 0.0031 - val_loss: 0.0049 Epoch 8/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 66ms/step - loss: 0.0030 - val_loss: 0.0050 Epoch 9/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 44ms/step - loss: 0.0033 - val_loss: 0.0045 Epoch 10/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0032 - val_loss: 0.0066 Epoch 11/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 54ms/step - loss: 0.0028 - val_loss: 0.0060 Epoch 12/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 67ms/step - loss: 0.0030 - val_loss: 0.0040 Epoch 13/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 45ms/step - loss: 0.0025 - val_loss: 0.0040 Epoch 14/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 0.0031 - val_loss: 0.0042 Epoch 15/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 63ms/step - loss: 0.0026 - val_loss: 0.0041 Epoch 16/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 66ms/step - loss: 0.0025 - val_loss: 0.0036 Epoch 17/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 72ms/step - loss: 0.0025 - val_loss: 0.0036 Epoch 18/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 72ms/step - loss: 0.0021 - val_loss: 0.0054 Epoch 19/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 59ms/step - loss: 0.0030 - val_loss: 0.0033 Epoch 20/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 3s 97ms/step - loss: 0.0021 - val_loss: 0.0038 Epoch 21/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 78ms/step - loss: 0.0024 - val_loss: 0.0042 Epoch 22/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 46ms/step - loss: 0.0023 - val_loss: 0.0032 Epoch 23/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0022 - val_loss: 0.0032 Epoch 24/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 65ms/step - loss: 0.0021 - val_loss: 0.0031 Epoch 25/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 44ms/step - loss: 0.0018 - val_loss: 0.0031 Epoch 26/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0022 - val_loss: 0.0031 Epoch 27/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 55ms/step - loss: 0.0020 - val_loss: 0.0043 Epoch 28/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 74ms/step - loss: 0.0027 - val_loss: 0.0029 Epoch 29/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 70ms/step - loss: 0.0021 - val_loss: 0.0029 Epoch 30/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 68ms/step - loss: 0.0023 - val_loss: 0.0031 Epoch 31/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 80ms/step - loss: 0.0021 - val_loss: 0.0039 Epoch 32/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 78ms/step - loss: 0.0024 - val_loss: 0.0029 Epoch 33/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 82ms/step - loss: 0.0018 - val_loss: 0.0037 Epoch 34/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 45ms/step - loss: 0.0019 - val_loss: 0.0026 Epoch 35/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0019 - val_loss: 0.0047 Epoch 36/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 58ms/step - loss: 0.0020 - val_loss: 0.0026 Epoch 37/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 73ms/step - loss: 0.0018 - val_loss: 0.0027 Epoch 38/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 3s 85ms/step - loss: 0.0018 - val_loss: 0.0024 Epoch 39/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 3s 85ms/step - loss: 0.0016 - val_loss: 0.0024 Epoch 40/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 75ms/step - loss: 0.0016 - val_loss: 0.0024 Epoch 41/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 77ms/step - loss: 0.0017 - val_loss: 0.0028 Epoch 42/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 3s 88ms/step - loss: 0.0020 - val_loss: 0.0032 Epoch 43/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 3s 79ms/step - loss: 0.0020 - val_loss: 0.0027 Epoch 44/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 3s 97ms/step - loss: 0.0018 - val_loss: 0.0023 Epoch 45/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 3s 82ms/step - loss: 0.0014 - val_loss: 0.0024 Epoch 46/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 87ms/step - loss: 0.0022 - val_loss: 0.0026 Epoch 47/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 83ms/step - loss: 0.0017 - val_loss: 0.0022 Epoch 48/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 66ms/step - loss: 0.0017 - val_loss: 0.0032 Epoch 49/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 71ms/step - loss: 0.0021 - val_loss: 0.0023 Epoch 50/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 44ms/step - loss: 0.0019 - val_loss: 0.0022 Epoch 1/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 7s 106ms/step - loss: 0.0706 - val_loss: 0.0119 Epoch 2/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 78ms/step - loss: 0.0066 - val_loss: 0.0120 Epoch 3/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 77ms/step - loss: 0.0052 - val_loss: 0.0103 Epoch 4/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 41ms/step - loss: 0.0046 - val_loss: 0.0119 Epoch 5/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 0.0054 - val_loss: 0.0095 Epoch 6/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 55ms/step - loss: 0.0036 - val_loss: 0.0091 Epoch 7/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 41ms/step - loss: 0.0036 - val_loss: 0.0084 Epoch 8/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0037 - val_loss: 0.0093 Epoch 9/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0038 - val_loss: 0.0079 Epoch 10/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 71ms/step - loss: 0.0035 - val_loss: 0.0078 Epoch 11/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 73ms/step - loss: 0.0031 - val_loss: 0.0075 Epoch 12/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 42ms/step - loss: 0.0030 - val_loss: 0.0074 Epoch 13/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 0.0031 - val_loss: 0.0073 Epoch 14/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 56ms/step - loss: 0.0038 - val_loss: 0.0077 Epoch 15/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 79ms/step - loss: 0.0027 - val_loss: 0.0066 Epoch 16/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 80ms/step - loss: 0.0030 - val_loss: 0.0065 Epoch 17/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 84ms/step - loss: 0.0032 - val_loss: 0.0064 Epoch 18/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 3s 81ms/step - loss: 0.0026 - val_loss: 0.0061 Epoch 19/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 68ms/step - loss: 0.0030 - val_loss: 0.0064 Epoch 20/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 63ms/step - loss: 0.0028 - val_loss: 0.0060 Epoch 21/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 64ms/step - loss: 0.0029 - val_loss: 0.0057 Epoch 22/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 64ms/step - loss: 0.0025 - val_loss: 0.0056 Epoch 23/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 45ms/step - loss: 0.0026 - val_loss: 0.0054 Epoch 24/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 0.0024 - val_loss: 0.0052 Epoch 25/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 52ms/step - loss: 0.0028 - val_loss: 0.0053 Epoch 26/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 65ms/step - loss: 0.0031 - val_loss: 0.0052 Epoch 27/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 70ms/step - loss: 0.0030 - val_loss: 0.0049 Epoch 28/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 70ms/step - loss: 0.0026 - val_loss: 0.0046 Epoch 29/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 71ms/step - loss: 0.0024 - val_loss: 0.0053 Epoch 30/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 68ms/step - loss: 0.0025 - val_loss: 0.0046 Epoch 31/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 75ms/step - loss: 0.0026 - val_loss: 0.0043 Epoch 32/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 41ms/step - loss: 0.0023 - val_loss: 0.0043 Epoch 33/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 0.0022 - val_loss: 0.0043 Epoch 34/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 55ms/step - loss: 0.0021 - val_loss: 0.0047 Epoch 35/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 65ms/step - loss: 0.0025 - val_loss: 0.0040 Epoch 36/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 84ms/step - loss: 0.0023 - val_loss: 0.0043 Epoch 37/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 71ms/step - loss: 0.0023 - val_loss: 0.0045 Epoch 38/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 74ms/step - loss: 0.0027 - val_loss: 0.0040 Epoch 39/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 80ms/step - loss: 0.0023 - val_loss: 0.0038 Epoch 40/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 73ms/step - loss: 0.0021 - val_loss: 0.0037 Epoch 41/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 41ms/step - loss: 0.0021 - val_loss: 0.0037 Epoch 42/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 0.0022 - val_loss: 0.0036 Epoch 43/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 52ms/step - loss: 0.0020 - val_loss: 0.0037 Epoch 44/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 44ms/step - loss: 0.0020 - val_loss: 0.0035 Epoch 45/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 0.0023 - val_loss: 0.0036 Epoch 46/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 58ms/step - loss: 0.0019 - val_loss: 0.0035 Epoch 47/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 80ms/step - loss: 0.0021 - val_loss: 0.0037 Epoch 48/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 3s 79ms/step - loss: 0.0019 - val_loss: 0.0035 Epoch 49/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 83ms/step - loss: 0.0019 - val_loss: 0.0033 Epoch 50/50 27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 72ms/step - loss: 0.0015 - val_loss: 0.0036
def evaluate_model(model, x_test, y_test, scaler):
# Predict the prices using the model
predicted_prices = model.predict(x_test)
# Inverse transform to get actual prices
predicted_prices = scaler.inverse_transform(predicted_prices)
actual_prices = scaler.inverse_transform(y_test.reshape(-1, 1))
# Calculate the evaluation metrics
mse = mean_squared_error(actual_prices, predicted_prices)
mae = mean_absolute_error(actual_prices, predicted_prices)
rmse = np.sqrt(mse)
return predicted_prices, actual_prices, mse, mae, rmse
# Evaluate Bitcoin model
predicted_btc, actual_btc, mse_btc, mae_btc, rmse_btc = evaluate_model(model_btc, x_test_btc, y_test_btc, scaler_btc)
# Evaluate Ethereum model
predicted_eth, actual_eth, mse_eth, mae_eth, rmse_eth = evaluate_model(model_eth, x_test_eth, y_test_eth, scaler_eth)
print(f"Bitcoin Model - MSE: {mse_btc}, MAE: {mae_btc}, RMSE: {rmse_btc}")
print(f"Ethereum Model - MSE: {mse_eth}, MAE: {mae_eth}, RMSE: {rmse_eth}")
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 91ms/step 7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 101ms/step Bitcoin Model - MSE: 32778201485.554764, MAE: 146029.65714285715, RMSE: 181047.51168009674 Ethereum Model - MSE: 280453667.4000361, MAE: 12771.882961309524, RMSE: 16746.75095055862
def plot_interactive_predictions(actual, predicted, title):
fig = go.Figure()
# Add actual price trace
fig.add_trace(go.Scatter(x=list(range(len(actual))), y=actual.flatten(), mode='lines', name='Actual Price'))
# Add predicted price trace
fig.add_trace(go.Scatter(x=list(range(len(predicted))), y=predicted.flatten(), mode='lines', name='Predicted Price'))
# Update layout
fig.update_layout(title=title, xaxis_title='Time', yaxis_title='Price',
legend_title="Legend", template="plotly_white")
fig.show()
# Plot Bitcoin and Ethereum predictions interactively
plot_interactive_predictions(actual_btc, predicted_btc, 'Interactive Bitcoin Price Prediction')
plot_interactive_predictions(actual_eth, predicted_eth, 'Interactive Ethereum Price Prediction')