Libraries needed¶

In [1]:
import pandas as pd
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler

Data Cleaning¶

Bitcoin - GBP¶

In [2]:
df = pd.read_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\BTC_GBP Coinbase Pro Historical Data.csv')

print(df.head())

df.dtypes
         Date     Price      Open      High       Low   Vol. Change %
0  03/01/2024  49,286.4  48,428.9  49,877.9  48,087.1  0.29K    1.77%
1  02/29/2024  48,428.9  48,291.2  49,826.0  46,392.2  0.55K    0.34%
2  02/28/2024  48,266.6  44,822.7  50,332.0  44,600.0  0.72K    7.72%
3  02/27/2024  44,808.9  42,720.5  45,120.0  41,954.2  0.36K    4.90%
4  02/26/2024  42,715.9  40,829.1  43,192.9  40,129.8  0.27K    4.62%
Out[2]:
Date        object
Price       object
Open        object
High        object
Low         object
Vol.        object
Change %    object
dtype: object
In [3]:
df.dtypes
df['Price'] = df['Price'].astype(str).str.replace(',', '').astype(float)
df['Open'] = df['Open'].astype(str).str.replace(',', '').astype(float)
df['High'] = df['High'].astype(str).str.replace(',', '').astype(float)
df['Low'] = df['Low'].astype(str).str.replace(',', '').astype(float)

# Replace the percentage sign and convert to float
df['Change %'] = df['Change %'].astype(str).str.replace('%', '').astype(float)

#Change vol - K and M
def convert_volume_to_numeric(vol):
    if pd.isna(vol):
        return None
    vol = str(vol).upper().replace(',', '')
    if 'K' in vol:
        return float(vol.replace('K', '')) * 1e3
    elif 'M' in vol:
        return float(vol.replace('M', '')) * 1e6
    else:
        return float(vol)

df['Vol.'] = df['Vol.'].apply(convert_volume_to_numeric)


# Change the date datatype: Object to Datetime
df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%Y')

# Display the cleaned DataFrame
print(df)
df.dtypes
           Date    Price     Open     High      Low    Vol.  Change %
0    2024-03-01  49286.4  48428.9  49877.9  48087.1   290.0      1.77
1    2024-02-29  48428.9  48291.2  49826.0  46392.2   550.0      0.34
2    2024-02-28  48266.6  44822.7  50332.0  44600.0   720.0      7.72
3    2024-02-27  44808.9  42720.5  45120.0  41954.2   360.0      4.90
4    2024-02-26  42715.9  40829.1  43192.9  40129.8   270.0      4.62
...         ...      ...      ...      ...      ...     ...       ...
1092 2021-03-05  35229.0  34857.7  35229.0  34106.6  1340.0      1.06
1093 2021-03-04  34858.0  36240.0  36845.8  34836.3  1430.0     -3.91
1094 2021-03-03  36276.8  34767.1  37587.9  34767.1  1260.0      4.32
1095 2021-03-02  34774.1  35656.3  35656.3  34150.0   950.0     -2.47
1096 2021-03-01  35656.3  32380.0  35656.3  32380.0  1360.0     10.15

[1097 rows x 7 columns]
Out[3]:
Date        datetime64[ns]
Price              float64
Open               float64
High               float64
Low                float64
Vol.               float64
Change %           float64
dtype: object

Bitcoin - USD¶

In [4]:
bit_usd = pd.read_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\BTC_USD Bitfinex Historical Data.csv')

print(bit_usd.head())

bit_usd.dtypes
         Date     Price      Open      High       Low   Vol. Change %
0  03/01/2024  62,346.0  61,148.0  63,000.0  60,783.0  2.51K    1.96%
1  02/29/2024  61,148.0  62,380.0  63,566.0  60,385.0  4.10K   -1.97%
2  02/28/2024  62,380.0  56,998.0  63,780.0  56,668.0  6.63K    9.44%
3  02/27/2024  56,998.0  54,417.0  57,487.0  54,388.0  4.50K    4.74%
4  02/26/2024  54,417.0  51,716.0  54,720.0  50,903.0  4.39K    5.22%
Out[4]:
Date        object
Price       object
Open        object
High        object
Low         object
Vol.        object
Change %    object
dtype: object
In [5]:
bit_usd['Price'] = bit_usd['Price'].astype(str).str.replace(',', '').astype(float)
bit_usd['Open'] = bit_usd['Open'].astype(str).str.replace(',', '').astype(float)
bit_usd['High'] = bit_usd['High'].astype(str).str.replace(',', '').astype(float)
bit_usd['Low'] = bit_usd['Low'].astype(str).str.replace(',', '').astype(float)

# Replace the percentage sign and convert to float
bit_usd['Change %'] = bit_usd['Change %'].astype(str).str.replace('%', '').astype(float)

#Change vol - K and M
def convert_volume_to_numeric(vol):
    if pd.isna(vol):
        return None
    vol = str(vol).upper().replace(',', '')
    if 'K' in vol:
        return float(vol.replace('K', '')) * 1e3
    elif 'M' in vol:
        return float(vol.replace('M', '')) * 1e6
    else:
        return float(vol)

bit_usd['Vol.'] = bit_usd['Vol.'].apply(convert_volume_to_numeric)

# Change the date datatype from Object to Datetime
bit_usd['Date'] = pd.to_datetime(bit_usd['Date'], format='%m/%d/%Y')

# Display the cleaned DataFrame
print(bit_usd)

# Display the data types again to confirm changes
print(bit_usd.dtypes)
           Date    Price     Open     High      Low    Vol.  Change %
0    2024-03-01  62346.0  61148.0  63000.0  60783.0  2510.0      1.96
1    2024-02-29  61148.0  62380.0  63566.0  60385.0  4100.0     -1.97
2    2024-02-28  62380.0  56998.0  63780.0  56668.0  6630.0      9.44
3    2024-02-27  56998.0  54417.0  57487.0  54388.0  4500.0      4.74
4    2024-02-26  54417.0  51716.0  54720.0  50903.0  4390.0      5.22
...         ...      ...      ...      ...      ...     ...       ...
1092 2021-03-05  48811.0  48368.0  49277.0  46662.0  9420.0      0.92
1093 2021-03-04  48368.0  50425.0  51556.0  48059.0  9560.0     -4.08
1094 2021-03-03  50425.0  48526.0  52018.0  48526.0  8600.0      3.91
1095 2021-03-02  48526.0  49572.0  49572.0  47453.6  8370.0     -2.11
1096 2021-03-01  49572.0  45300.0  49572.0  45300.0  9420.0      9.43

[1097 rows x 7 columns]
Date        datetime64[ns]
Price              float64
Open               float64
High               float64
Low                float64
Vol.               float64
Change %           float64
dtype: object

Bitcoin - INR¶

In [6]:
bit_inr = pd.read_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\BTC_INR Synthetic Historical Data.csv')

print(bit_inr.head())

bit_inr.dtypes
         Date      Price       Open       High        Low  Vol. Change %
0  03/01/2024  5,170,624  5,071,696  5,227,076  5,036,433   NaN    1.91%
1  02/29/2024  5,073,948  5,176,916  5,279,115  5,017,673   NaN   -1.98%
2  02/28/2024  5,176,321  4,731,160  5,296,428  4,699,779   NaN    9.40%
3  02/27/2024  4,731,766  4,513,558  4,768,902  4,512,017   NaN    4.82%
4  02/26/2024  4,514,234  4,287,393  4,548,782  4,221,497   NaN    5.28%
Out[6]:
Date         object
Price        object
Open         object
High         object
Low          object
Vol.        float64
Change %     object
dtype: object
In [7]:
bit_inr['Price'] = bit_inr['Price'].astype(str).str.replace(',', '').astype(float)
bit_inr['Open'] = bit_inr['Open'].astype(str).str.replace(',', '').astype(float)
bit_inr['High'] = bit_inr['High'].astype(str).str.replace(',', '').astype(float)
bit_inr['Low'] = bit_inr['Low'].astype(str).str.replace(',', '').astype(float)

# Replace the percentage sign and convert to float
bit_inr['Change %'] = bit_inr['Change %'].astype(str).str.replace('%', '').astype(float)

# Change the date datatype from Object to Datetime
bit_inr['Date'] = pd.to_datetime(bit_inr['Date'], format='%m/%d/%Y')

# Display the cleaned DataFrame
print(bit_inr)

# Display the data types again to confirm changes
print(bit_inr.dtypes)
           Date      Price       Open       High        Low  Vol.  Change %
0    2024-03-01  5170624.0  5071696.0  5227076.0  5036433.0   NaN      1.91
1    2024-02-29  5073948.0  5176916.0  5279115.0  5017673.0   NaN     -1.98
2    2024-02-28  5176321.0  4731160.0  5296428.0  4699779.0   NaN      9.40
3    2024-02-27  4731766.0  4513558.0  4768902.0  4512017.0   NaN      4.82
4    2024-02-26  4514234.0  4287393.0  4548782.0  4221497.0   NaN      5.28
...         ...        ...        ...        ...        ...   ...       ...
1092 2021-03-05  3575587.0  3536499.0  3617104.0  3388905.0   NaN      1.00
1093 2021-03-04  3540226.0  3675759.0  3777031.0  3456393.0   NaN     -3.82
1094 2021-03-03  3680660.0  3545733.0  3824793.0  3527003.0   NaN      3.84
1095 2021-03-02  3544437.0  3639517.0  3683463.0  3452170.0   NaN     -2.60
1096 2021-03-01  3639207.0  3342246.0  3652659.0  3327820.0   NaN      9.29

[1097 rows x 7 columns]
Date        datetime64[ns]
Price              float64
Open               float64
High               float64
Low                float64
Vol.               float64
Change %           float64
dtype: object

Ethereum - GBP¶

In [8]:
eth_gbp = pd.read_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\ETH_GBP Coinbase Pro Historical Data.csv')

print(eth_gbp.head())

eth_gbp.dtypes
         Date     Price      Open      High       Low   Vol. Change %
0  03/01/2024  2,712.34  2,645.76  2,730.76  2,643.83  1.84K    2.51%
1  02/29/2024  2,646.04  2,617.37  2,772.57  2,570.71  4.62K    1.11%
2  02/28/2024  2,616.93  2,546.90  2,748.91  2,534.89  4.35K    2.75%
3  02/27/2024  2,546.90  2,490.22  2,581.24  2,409.79  2.89K    2.15%
4  02/26/2024  2,493.37  2,456.94  2,515.03  2,392.68  2.45K    1.48%
Out[8]:
Date        object
Price       object
Open        object
High        object
Low         object
Vol.        object
Change %    object
dtype: object
In [9]:
eth_gbp['Price'] = eth_gbp['Price'].astype(str).str.replace(',', '').astype(float)
eth_gbp['Open'] = eth_gbp['Open'].astype(str).str.replace(',', '').astype(float)
eth_gbp['High'] = eth_gbp['High'].astype(str).str.replace(',', '').astype(float)
eth_gbp['Low'] = eth_gbp['Low'].astype(str).str.replace(',', '').astype(float)

# Replace the percentage sign and convert to float
eth_gbp['Change %'] = eth_gbp['Change %'].astype(str).str.replace('%', '').astype(float)

# Convert 'Vol.' from K and M to numeric
def convert_volume_to_numeric(vol):
    if pd.isna(vol):
        return None
    vol = str(vol).upper().replace(',', '')
    if 'K' in vol:
        return float(vol.replace('K', '')) * 1e3
    elif 'M' in vol:
        return float(vol.replace('M', '')) * 1e6
    else:
        return float(vol)

eth_gbp['Vol.'] = eth_gbp['Vol.'].apply(convert_volume_to_numeric)

# Change the date datatype from Object to Datetime
eth_gbp['Date'] = pd.to_datetime(eth_gbp['Date'], format='%m/%d/%Y')

# Display the cleaned DataFrame
print(eth_gbp)

# Display the data types again to confirm changes
print(eth_gbp.dtypes)
           Date    Price     Open     High      Low     Vol.  Change %
0    2024-03-01  2712.34  2645.76  2730.76  2643.83   1840.0      2.51
1    2024-02-29  2646.04  2617.37  2772.57  2570.71   4620.0      1.11
2    2024-02-28  2616.93  2546.90  2748.91  2534.89   4350.0      2.75
3    2024-02-27  2546.90  2490.22  2581.24  2409.79   2890.0      2.15
4    2024-02-26  2493.37  2456.94  2515.03  2392.68   2450.0      1.48
...         ...      ...      ...      ...      ...      ...       ...
1092 2021-03-05  1103.72  1109.56  1109.56  1066.83  16360.0     -0.53
1093 2021-03-04  1109.56  1128.71  1151.74  1109.56  14850.0     -1.70
1094 2021-03-03  1128.71  1067.56  1179.95  1067.56  15660.0      5.81
1095 2021-03-02  1066.76  1129.25  1129.25  1055.80  17420.0     -5.51
1096 2021-03-01  1129.00  1016.71  1129.00  1016.71  18170.0     10.93

[1097 rows x 7 columns]
Date        datetime64[ns]
Price              float64
Open               float64
High               float64
Low                float64
Vol.               float64
Change %           float64
dtype: object

Ethereum - USD¶

In [10]:
eth_usd = pd.read_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\Ethereum Historical Data.csv')

print(eth_usd.head())

eth_usd.dtypes
         Date     Price      Open      High       Low     Vol. Change %
0  03/01/2024  3,433.51  3,339.25  3,449.56  3,339.24  431.98K    2.82%
1  02/29/2024  3,339.26  3,382.98  3,520.80  3,300.42  802.17K   -1.29%
2  02/28/2024  3,383.03  3,242.44  3,482.48  3,183.92    1.01M    4.35%
3  02/27/2024  3,242.14  3,175.94  3,285.01  3,165.91  674.56K    2.08%
4  02/26/2024  3,175.94  3,112.59  3,193.67  3,038.00  582.39K    2.04%
Out[10]:
Date        object
Price       object
Open        object
High        object
Low         object
Vol.        object
Change %    object
dtype: object
In [11]:
eth_usd['Price'] = eth_usd['Price'].astype(str).str.replace(',', '').astype(float)
eth_usd['Open'] = eth_usd['Open'].astype(str).str.replace(',', '').astype(float)
eth_usd['High'] = eth_usd['High'].astype(str).str.replace(',', '').astype(float)
eth_usd['Low'] = eth_usd['Low'].astype(str).str.replace(',', '').astype(float)

# Replace the percentage sign and convert to float
eth_usd['Change %'] = eth_usd['Change %'].astype(str).str.replace('%', '').astype(float)

# Convert 'Vol.' from K and M to numeric
def convert_volume_to_numeric(vol):
    if pd.isna(vol):
        return None
    vol = str(vol).upper().replace(',', '')
    if 'K' in vol:
        return float(vol.replace('K', '')) * 1e3
    elif 'M' in vol:
        return float(vol.replace('M', '')) * 1e6
    elif 'B' in vol:
        return float(vol.replace('B', '')) * 1e9
    else:
        return float(vol)

eth_usd['Vol.'] = eth_usd['Vol.'].apply(convert_volume_to_numeric)

# Change the date datatype from Object to Datetime
eth_usd['Date'] = pd.to_datetime(eth_usd['Date'], format='%m/%d/%Y')

# Display the cleaned DataFrame
print(eth_usd)

# Display the data types again to confirm changes
print(eth_usd.dtypes)
           Date    Price     Open     High      Low       Vol.  Change %
0    2024-03-01  3433.51  3339.25  3449.56  3339.24   431980.0      2.82
1    2024-02-29  3339.26  3382.98  3520.80  3300.42   802170.0     -1.29
2    2024-02-28  3383.03  3242.44  3482.48  3183.92  1010000.0      4.35
3    2024-02-27  3242.14  3175.94  3285.01  3165.91   674560.0      2.08
4    2024-02-26  3175.94  3112.59  3193.67  3038.00   582390.0      2.04
...         ...      ...      ...      ...      ...        ...       ...
1092 2021-03-05  1530.29  1539.73  1548.71  1443.32  1670000.0     -0.54
1093 2021-03-04  1538.61  1567.84  1623.73  1506.86  1700000.0     -2.06
1094 2021-03-03  1571.05  1488.80  1653.04  1477.72  1170000.0      5.54
1095 2021-03-02  1488.62  1570.00  1602.13  1457.22  1740000.0     -5.18
1096 2021-03-01  1570.03  1418.91  1571.15  1411.55  1920000.0     10.66

[1097 rows x 7 columns]
Date        datetime64[ns]
Price              float64
Open               float64
High               float64
Low                float64
Vol.               float64
Change %           float64
dtype: object

Ethereum - INR¶

In [12]:
eth_inr = pd.read_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\ETH_INR Synthetic Historical Data.csv')

print(eth_inr.head())

eth_inr.dtypes
         Date    Price     Open     High      Low  Vol. Change %
0  03/01/2024  284,443  277,151  285,858  276,932   NaN    2.69%
1  02/29/2024  276,986  280,510  291,857  273,646   NaN   -1.26%
2  02/28/2024  280,525  268,963  288,714  263,953   NaN    4.27%
3  02/27/2024  269,047  263,240  272,341  262,357   NaN    2.24%
4  02/26/2024  263,146  258,124  264,699  251,787   NaN    1.98%
Out[12]:
Date         object
Price        object
Open         object
High         object
Low          object
Vol.        float64
Change %     object
dtype: object
In [13]:
eth_inr['Price'] = eth_inr['Price'].astype(str).str.replace(',', '').astype(float)
eth_inr['Open'] = eth_inr['Open'].astype(str).str.replace(',', '').astype(float)
eth_inr['High'] = eth_inr['High'].astype(str).str.replace(',', '').astype(float)
eth_inr['Low'] = eth_inr['Low'].astype(str).str.replace(',', '').astype(float)

# Replace the percentage sign and convert to float
eth_inr['Change %'] = eth_inr['Change %'].astype(str).str.replace('%', '').astype(float)

# Change the date datatype from Object to Datetime
eth_inr['Date'] = pd.to_datetime(eth_inr['Date'], format='%m/%d/%Y')

# Display the cleaned DataFrame
print(eth_inr)

# Display the data types again to confirm changes
print(eth_inr.dtypes)
           Date     Price      Open      High       Low  Vol.  Change %
0    2024-03-01  284443.0  277151.0  285858.0  276932.0   NaN      2.69
1    2024-02-29  276986.0  280510.0  291857.0  273646.0   NaN     -1.26
2    2024-02-28  280525.0  268963.0  288714.0  263953.0   NaN      4.27
3    2024-02-27  269047.0  263240.0  272341.0  262357.0   NaN      2.24
4    2024-02-26  263146.0  258124.0  264699.0  251787.0   NaN      1.98
...         ...       ...       ...       ...       ...   ...       ...
1092 2021-03-05  112137.0  112686.0  113266.0  105354.0   NaN     -0.30
1093 2021-03-04  112469.0  114257.0  118471.0  109596.0   NaN     -1.93
1094 2021-03-03  114688.0  108800.0  120400.0  108254.0   NaN      5.04
1095 2021-03-02  109183.0  115186.0  117553.0  106746.0   NaN     -5.27
1096 2021-03-01  115252.0  105213.0  115301.0  104118.0   NaN     10.03

[1097 rows x 7 columns]
Date        datetime64[ns]
Price              float64
Open               float64
High               float64
Low                float64
Vol.               float64
Change %           float64
dtype: object

Merging the datasets¶

In [14]:
# Start by merging the first two
#merged_df = pd.merge(df, bit_usd, on='Date', how='outer')

# Continue merging the rest one by one
#merged_df = pd.merge(merged_df, bit_inr, on='Date', how='outer')


# Once all are merged, save to CSV
#merged_df.to_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\merged_bitcoin_dataset.csv', index=False)
In [15]:
# merged_df1 = pd.merge(eth_usd, eth_gbp, on='Date', how='outer')
# merged_df1 = pd.merge(merged_df1, eth_inr, on='Date', how='outer')
# merged_df1.to_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\merged_ethereum_dataset.csv', index=False)
In [16]:
bitcoin_df = pd.read_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\merged_bitcoin_dataset.csv') 
rename_dict = {
    'Price_x': 'Price_GBP', 
    'Price_y': 'Price_USD', 
    'Price': 'Price_INR',
    'Open_x': 'Open_GBP',
    'High_x': 'High_GBP',
    'Low_x': 'Low_GBP',
    'Vol._x': 'Vol._GBP',
    'Change %_x': 'Change %_GBP',
    'Open_y': 'Open_USD',
    'High_y': 'High_USD',
    'Low_y': 'Low_USD',
    'Vol._y': 'Vol._USD',
    'Change %_y': 'Change %_USD',
    'Open': 'Open_INR',
    'High': 'High_INR',
    'Low': 'Low_INR',
    'Vol.': 'Vol._INR',
    'Change %': 'Change %_INR'
}

# Use the rename method to update the column names
bitcoin_df.rename(columns=rename_dict, inplace=True)

print(bitcoin_df.head())
         Date  Price_GBP  Open_GBP  High_GBP  Low_GBP  Vol._GBP  Change %_GBP  \
0  2024-03-01    49286.4   48428.9   49877.9  48087.1     290.0          1.77   
1  2024-02-29    48428.9   48291.2   49826.0  46392.2     550.0          0.34   
2  2024-02-28    48266.6   44822.7   50332.0  44600.0     720.0          7.72   
3  2024-02-27    44808.9   42720.5   45120.0  41954.2     360.0          4.90   
4  2024-02-26    42715.9   40829.1   43192.9  40129.8     270.0          4.62   

   Price_USD  Open_USD  High_USD  Low_USD  Vol._USD  Change %_USD  Price_INR  \
0    62346.0   61148.0   63000.0  60783.0    2510.0          1.96  5170624.0   
1    61148.0   62380.0   63566.0  60385.0    4100.0         -1.97  5073948.0   
2    62380.0   56998.0   63780.0  56668.0    6630.0          9.44  5176321.0   
3    56998.0   54417.0   57487.0  54388.0    4500.0          4.74  4731766.0   
4    54417.0   51716.0   54720.0  50903.0    4390.0          5.22  4514234.0   

    Open_INR   High_INR    Low_INR  Vol._INR  Change %_INR  
0  5071696.0  5227076.0  5036433.0       NaN          1.91  
1  5176916.0  5279115.0  5017673.0       NaN         -1.98  
2  4731160.0  5296428.0  4699779.0       NaN          9.40  
3  4513558.0  4768902.0  4512017.0       NaN          4.82  
4  4287393.0  4548782.0  4221497.0       NaN          5.28  
In [17]:
ethereum_df = pd.read_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\merged_ethereum_dataset.csv')

rename_dict1 = {
    'Price_x': 'Price_USD', 
    'Price_y': 'Price_GBP', 
    'Price': 'Price_INR',
    'Open_x': 'Open_USD',
    'High_x': 'High_USD',
    'Low_x': 'Low_USD',
    'Vol._x': 'Vol._USD',
    'Change %_x': 'Change %_USD',
    'Open_y': 'Open_GBP',
    'High_y': 'High_GBP',
    'Low_y': 'Low_GBP',
    'Vol._y': 'Vol._GBP',
    'Change %_y': 'Change %_GBP',
    'Open': 'Open_INR',
    'High': 'High_INR',
    'Low': 'Low_INR',
    'Vol.': 'Vol._INR',
    'Change %': 'Change %_INR'
}
ethereum_df.rename(columns=rename_dict1, inplace=True)
print(ethereum_df.head())
         Date  Price_USD  Open_USD  High_USD  Low_USD   Vol._USD  \
0  2024-03-01    3433.51   3339.25   3449.56  3339.24   431980.0   
1  2024-02-29    3339.26   3382.98   3520.80  3300.42   802170.0   
2  2024-02-28    3383.03   3242.44   3482.48  3183.92  1010000.0   
3  2024-02-27    3242.14   3175.94   3285.01  3165.91   674560.0   
4  2024-02-26    3175.94   3112.59   3193.67  3038.00   582390.0   

   Change %_USD  Price_GBP  Open_GBP  High_GBP  Low_GBP  Vol._GBP  \
0          2.82    2712.34   2645.76   2730.76  2643.83    1840.0   
1         -1.29    2646.04   2617.37   2772.57  2570.71    4620.0   
2          4.35    2616.93   2546.90   2748.91  2534.89    4350.0   
3          2.08    2546.90   2490.22   2581.24  2409.79    2890.0   
4          2.04    2493.37   2456.94   2515.03  2392.68    2450.0   

   Change %_GBP  Price_INR  Open_INR  High_INR   Low_INR  Vol._INR  \
0          2.51   284443.0  277151.0  285858.0  276932.0       NaN   
1          1.11   276986.0  280510.0  291857.0  273646.0       NaN   
2          2.75   280525.0  268963.0  288714.0  263953.0       NaN   
3          2.15   269047.0  263240.0  272341.0  262357.0       NaN   
4          1.48   263146.0  258124.0  264699.0  251787.0       NaN   

   Change %_INR  
0          2.69  
1         -1.26  
2          4.27  
3          2.24  
4          1.98  

Visualizations¶

In [18]:
correlation_matrix = bitcoin_df[['Price_GBP', 'Price_USD', 'Price_INR']].corr()

plt.figure(figsize=(6, 4))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Price Correlation Between Currencies for Bitcoin')
plt.show()

correlation_matrix1 = ethereum_df[['Price_GBP', 'Price_USD', 'Price_INR']].corr()

plt.figure(figsize=(6, 4))
sns.heatmap(correlation_matrix1, annot=True, cmap='coolwarm')
plt.title('Price Correlation Between Currencies for Ethereum')
plt.show()
In [19]:
import plotly.graph_objs as go

btc_trace = go.Scatter(
    x=bitcoin_df['Date'], 
    y=bitcoin_df['Price_USD'], 
    mode='lines',
    name='Bitcoin',
    hoverinfo='text', 
    text=[ 
        f'Date: {date}<br>USD: ${usd}<br>GBP: £{gbp}<br>INR:  ₹{inr}'
        for date, usd, gbp, inr in zip(bitcoin_df['Date'], bitcoin_df['Price_USD'], bitcoin_df['Price_GBP'], bitcoin_df['Price_INR'])
    ]
)

eth_trace = go.Scatter(
    x=ethereum_df['Date'], 
    y=ethereum_df['Price_USD'], 
    mode='lines',
    name='Ethereum',
    hoverinfo='text',
    text=[ 
        f'Date: {date}<br>USD: ${usd}<br>GBP: £{gbp}<br>INR:  ₹{inr}'
        for date, usd, gbp, inr in zip(ethereum_df['Date'], ethereum_df['Price_USD'], ethereum_df['Price_GBP'], ethereum_df['Price_INR'])
    ]
)
# Create a figure
fig = go.Figure()

# Add the Bitcoin and Ethereum trace
fig.add_trace(btc_trace)
fig.add_trace(eth_trace)

# Define updatemenu
updatemenu = dict(
    buttons=[
        dict(label="Bitcoin",
             method="update",
             args=[{"visible": [True, False, False]},
                   {"title": "Bitcoin Price Trend"}]),
        dict(label="Ethereum",
             method="update",
             args=[{"visible": [False, True, True]},
                   {"title": "Ethereum Price Trend"}])
    ],
    direction="down",
    pad={"r": 10, "t": 10},
    showactive=True,
    x=0.97, 
    xanchor="right", 
    y=1.15, 
    yanchor="top"
)

# Update the layout with the new updatemenu
fig.update_layout(updatemenus=[updatemenu])

fig.update_layout(
    title_text="Cryptocurrency Price Trend",
    xaxis_title='Date',
    yaxis_title='Price'
)

# Show the plot
fig.show()
In [20]:
# Convert 'Date' columns to datetime format
ethereum_df['Date'] = pd.to_datetime(ethereum_df['Date'])
bitcoin_df['Date'] = pd.to_datetime(bitcoin_df['Date'])

# Merge datasets on 'Date' for a combined visualization
combined_df = pd.merge(ethereum_df[['Date', 'Price_USD', 'Vol._USD']], bitcoin_df[['Date', 'Price_USD', 'Vol._USD']], on='Date', suffixes=('_eth', '_btc'))

fig_log = make_subplots(specs=[[{"secondary_y": True}]])

# Ethereum volume bars
fig_log.add_trace(go.Bar(x=combined_df['Date'], y=combined_df['Vol._USD_eth'], name='Ethereum',
                         marker_color='green', opacity=0.9, legendgroup='Ethereum', showlegend=True), secondary_y=True)

# Ethereum price line
fig_log.add_trace(go.Scatter(x=combined_df['Date'], y=combined_df['Price_USD_eth'], mode='lines',
                             line=dict(color='green'), legendgroup='Ethereum', showlegend=False), secondary_y=False)

# Bitcoin volume bars
fig_log.add_trace(go.Bar(x=combined_df['Date'], y=combined_df['Vol._USD_btc'], name='Bitcoin',
                         marker_color='red', opacity=0.9, legendgroup='Bitcoin', showlegend=True), secondary_y=True)

# Bitcoin price line
fig_log.add_trace(go.Scatter(x=combined_df['Date'], y=combined_df['Price_USD_btc'], mode='lines',
                             line=dict(color='red'), legendgroup='Bitcoin', showlegend=False), secondary_y=False)

# Add figure title
fig_log.update_layout(title_text="Ethereum and Bitcoin: Price and Trading Volume Over Time (Log Scale for Volume)")

# Set x-axis title
fig_log.update_xaxes(title_text="Date")

# Set y-axes titles
fig_log.update_yaxes(title_text="<b>Price</b> (USD)", secondary_y=False)
fig_log.update_yaxes(title_text="<b>Volume</b> (USD) - Log Scale", secondary_y=True, type="log")

fig_log.show()
In [21]:
btc_candle_fig = go.Figure(data=[go.Candlestick(x=bitcoin_df['Date'],
                open=bitcoin_df['Open_USD'], high=bitcoin_df['High_USD'],
                low=bitcoin_df['Low_USD'], close=bitcoin_df['Price_USD'])])

btc_candle_fig.update_layout(title='Bitcoin Candlestick Chart in USD', xaxis_title='Date', yaxis_title='Price')
btc_candle_fig.show()

eth_candle_fig = go.Figure(data=[go.Candlestick(x=ethereum_df['Date'],
                open=ethereum_df['Open_USD'], high=ethereum_df['High_USD'],
                low=ethereum_df['Low_USD'], close=ethereum_df['Price_USD'])])

eth_candle_fig.update_layout(title='Ethereum Candlestick Chart in USD', xaxis_title='Date', yaxis_title='Price')
eth_candle_fig.show()

Data Modeling¶

1) How does the volatility of Bitcoin and Ethereum compare across INR, USD, and GBP, and what data analytics techniques can best capture and explain this volatility?¶

In [22]:
# Load US economic data
inflation_usa_df = pd.read_excel('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\Interest Rate\\InflationRate_USA.xls')
interest_usa_df = pd.read_excel('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\Interest Rate\\Interest_USA.xls')

# Displaying the first few rows of each dataset to understand their structure
print("Bitcoin Data:\n", bitcoin_df.head())
print("Ethereum Data:\n", ethereum_df.head())
print("USA Inflation Rate Data:\n", inflation_usa_df.head())
print("USA Interest Rate Data:\n", interest_usa_df.head())
Bitcoin Data:
         Date  Price_GBP  Open_GBP  High_GBP  Low_GBP  Vol._GBP  Change %_GBP  \
0 2024-03-01    49286.4   48428.9   49877.9  48087.1     290.0          1.77   
1 2024-02-29    48428.9   48291.2   49826.0  46392.2     550.0          0.34   
2 2024-02-28    48266.6   44822.7   50332.0  44600.0     720.0          7.72   
3 2024-02-27    44808.9   42720.5   45120.0  41954.2     360.0          4.90   
4 2024-02-26    42715.9   40829.1   43192.9  40129.8     270.0          4.62   

   Price_USD  Open_USD  High_USD  Low_USD  Vol._USD  Change %_USD  Price_INR  \
0    62346.0   61148.0   63000.0  60783.0    2510.0          1.96  5170624.0   
1    61148.0   62380.0   63566.0  60385.0    4100.0         -1.97  5073948.0   
2    62380.0   56998.0   63780.0  56668.0    6630.0          9.44  5176321.0   
3    56998.0   54417.0   57487.0  54388.0    4500.0          4.74  4731766.0   
4    54417.0   51716.0   54720.0  50903.0    4390.0          5.22  4514234.0   

    Open_INR   High_INR    Low_INR  Vol._INR  Change %_INR  
0  5071696.0  5227076.0  5036433.0       NaN          1.91  
1  5176916.0  5279115.0  5017673.0       NaN         -1.98  
2  4731160.0  5296428.0  4699779.0       NaN          9.40  
3  4513558.0  4768902.0  4512017.0       NaN          4.82  
4  4287393.0  4548782.0  4221497.0       NaN          5.28  
Ethereum Data:
         Date  Price_USD  Open_USD  High_USD  Low_USD   Vol._USD  Change %_USD  \
0 2024-03-01    3433.51   3339.25   3449.56  3339.24   431980.0          2.82   
1 2024-02-29    3339.26   3382.98   3520.80  3300.42   802170.0         -1.29   
2 2024-02-28    3383.03   3242.44   3482.48  3183.92  1010000.0          4.35   
3 2024-02-27    3242.14   3175.94   3285.01  3165.91   674560.0          2.08   
4 2024-02-26    3175.94   3112.59   3193.67  3038.00   582390.0          2.04   

   Price_GBP  Open_GBP  High_GBP  Low_GBP  Vol._GBP  Change %_GBP  Price_INR  \
0    2712.34   2645.76   2730.76  2643.83    1840.0          2.51   284443.0   
1    2646.04   2617.37   2772.57  2570.71    4620.0          1.11   276986.0   
2    2616.93   2546.90   2748.91  2534.89    4350.0          2.75   280525.0   
3    2546.90   2490.22   2581.24  2409.79    2890.0          2.15   269047.0   
4    2493.37   2456.94   2515.03  2392.68    2450.0          1.48   263146.0   

   Open_INR  High_INR   Low_INR  Vol._INR  Change %_INR  
0  277151.0  285858.0  276932.0       NaN          2.69  
1  280510.0  291857.0  273646.0       NaN         -1.26  
2  268963.0  288714.0  263953.0       NaN          4.27  
3  263240.0  272341.0  262357.0       NaN          2.24  
4  258124.0  264699.0  251787.0       NaN          1.98  
USA Inflation Rate Data:
         Date  inflation_rate
0 2021-03-01            2.40
1 2021-03-02            2.43
2 2021-03-03            2.45
3 2021-03-04            2.43
4 2021-03-05            2.43
USA Interest Rate Data:
         Date  interest_rate
0 2021-03-01           0.64
1 2021-03-02           0.60
2 2021-03-03           0.66
3 2021-03-04           0.70
4 2021-03-05           0.72
In [23]:
# Convert date columns to datetime format
inflation_usa_df['Date'] = pd.to_datetime(inflation_usa_df['Date'])
interest_usa_df['Date'] = pd.to_datetime(interest_usa_df['Date'])

# Set dates as indices
inflation_usa_df.set_index('Date', inplace=True)
interest_usa_df.set_index('Date', inplace=True)

# Merge US economic data into a single dataframe
usa_economic_data = pd.merge(inflation_usa_df, interest_usa_df, left_index=True, right_index=True)

# Convert cryptocurrency data dates to datetime format
bitcoin_df['Date'] = pd.to_datetime(bitcoin_df['Date'])
ethereum_df['Date'] = pd.to_datetime(ethereum_df['Date'])

# Set cryptocurrency data dates as indices
bitcoin_df.set_index('Date', inplace=True)
ethereum_df.set_index('Date', inplace=True)

# Display combined USA economic data
print("Combined USA Economic Data:\n", usa_economic_data.head())
Combined USA Economic Data:
             inflation_rate  interest_rate
Date                                     
2021-03-01            2.40           0.64
2021-03-02            2.43           0.60
2021-03-03            2.45           0.66
2021-03-04            2.43           0.70
2021-03-05            2.43           0.72
In [24]:
usa_economic_data
Out[24]:
inflation_rate interest_rate
Date
2021-03-01 2.40 0.64
2021-03-02 2.43 0.60
2021-03-03 2.45 0.66
2021-03-04 2.43 0.70
2021-03-05 2.43 0.72
... ... ...
2024-02-26 2.36 -1.04
2024-02-27 2.38 -1.01
2024-02-28 2.38 -1.07
2024-02-29 2.40 -1.07
2024-03-01 2.40 -1.16

785 rows × 2 columns

In [25]:
# Calculate daily returns
bitcoin_df['BTC_Returns_USD'] = bitcoin_df['Price_USD'].pct_change()
ethereum_df['ETH_Returns_USD'] = ethereum_df['Price_USD'].pct_change()

# 100-day rolling volatility
bitcoin_df['BTC_Volatility_USD'] = bitcoin_df['BTC_Returns_USD'].rolling(window=100).std()
ethereum_df['ETH_Volatility_USD'] = ethereum_df['ETH_Returns_USD'].rolling(window=100).std()

# Display the cryptocurrency data with added volatility
print("Bitcoin Data with Volatility:\n", bitcoin_df.head())
print("Ethereum Data with Volatility:\n", ethereum_df.head())
Bitcoin Data with Volatility:
             Price_GBP  Open_GBP  High_GBP  Low_GBP  Vol._GBP  Change %_GBP  \
Date                                                                         
2024-03-01    49286.4   48428.9   49877.9  48087.1     290.0          1.77   
2024-02-29    48428.9   48291.2   49826.0  46392.2     550.0          0.34   
2024-02-28    48266.6   44822.7   50332.0  44600.0     720.0          7.72   
2024-02-27    44808.9   42720.5   45120.0  41954.2     360.0          4.90   
2024-02-26    42715.9   40829.1   43192.9  40129.8     270.0          4.62   

            Price_USD  Open_USD  High_USD  Low_USD  Vol._USD  Change %_USD  \
Date                                                                         
2024-03-01    62346.0   61148.0   63000.0  60783.0    2510.0          1.96   
2024-02-29    61148.0   62380.0   63566.0  60385.0    4100.0         -1.97   
2024-02-28    62380.0   56998.0   63780.0  56668.0    6630.0          9.44   
2024-02-27    56998.0   54417.0   57487.0  54388.0    4500.0          4.74   
2024-02-26    54417.0   51716.0   54720.0  50903.0    4390.0          5.22   

            Price_INR   Open_INR   High_INR    Low_INR  Vol._INR  \
Date                                                               
2024-03-01  5170624.0  5071696.0  5227076.0  5036433.0       NaN   
2024-02-29  5073948.0  5176916.0  5279115.0  5017673.0       NaN   
2024-02-28  5176321.0  4731160.0  5296428.0  4699779.0       NaN   
2024-02-27  4731766.0  4513558.0  4768902.0  4512017.0       NaN   
2024-02-26  4514234.0  4287393.0  4548782.0  4221497.0       NaN   

            Change %_INR  BTC_Returns_USD  BTC_Volatility_USD  
Date                                                           
2024-03-01          1.91              NaN                 NaN  
2024-02-29         -1.98        -0.019215                 NaN  
2024-02-28          9.40         0.020148                 NaN  
2024-02-27          4.82        -0.086278                 NaN  
2024-02-26          5.28        -0.045282                 NaN  
Ethereum Data with Volatility:
             Price_USD  Open_USD  High_USD  Low_USD   Vol._USD  Change %_USD  \
Date                                                                          
2024-03-01    3433.51   3339.25   3449.56  3339.24   431980.0          2.82   
2024-02-29    3339.26   3382.98   3520.80  3300.42   802170.0         -1.29   
2024-02-28    3383.03   3242.44   3482.48  3183.92  1010000.0          4.35   
2024-02-27    3242.14   3175.94   3285.01  3165.91   674560.0          2.08   
2024-02-26    3175.94   3112.59   3193.67  3038.00   582390.0          2.04   

            Price_GBP  Open_GBP  High_GBP  Low_GBP  Vol._GBP  Change %_GBP  \
Date                                                                         
2024-03-01    2712.34   2645.76   2730.76  2643.83    1840.0          2.51   
2024-02-29    2646.04   2617.37   2772.57  2570.71    4620.0          1.11   
2024-02-28    2616.93   2546.90   2748.91  2534.89    4350.0          2.75   
2024-02-27    2546.90   2490.22   2581.24  2409.79    2890.0          2.15   
2024-02-26    2493.37   2456.94   2515.03  2392.68    2450.0          1.48   

            Price_INR  Open_INR  High_INR   Low_INR  Vol._INR  Change %_INR  \
Date                                                                          
2024-03-01   284443.0  277151.0  285858.0  276932.0       NaN          2.69   
2024-02-29   276986.0  280510.0  291857.0  273646.0       NaN         -1.26   
2024-02-28   280525.0  268963.0  288714.0  263953.0       NaN          4.27   
2024-02-27   269047.0  263240.0  272341.0  262357.0       NaN          2.24   
2024-02-26   263146.0  258124.0  264699.0  251787.0       NaN          1.98   

            ETH_Returns_USD  ETH_Volatility_USD  
Date                                             
2024-03-01              NaN                 NaN  
2024-02-29        -0.027450                 NaN  
2024-02-28         0.013108                 NaN  
2024-02-27        -0.041646                 NaN  
2024-02-26        -0.020419                 NaN  
In [26]:
# Merge Bitcoin and Ethereum data with US economic data
btc_usa_data = pd.merge(bitcoin_df, usa_economic_data, left_index=True, right_index=True)
eth_usa_data = pd.merge(ethereum_df, usa_economic_data, left_index=True, right_index=True)

# Display the merged datasets
print("Bitcoin & US Data:\n", btc_usa_data.head())
print("Ethereum & US Data:\n", eth_usa_data.head())
Bitcoin & US Data:
             Price_GBP  Open_GBP  High_GBP  Low_GBP  Vol._GBP  Change %_GBP  \
Date                                                                         
2024-03-01    49286.4   48428.9   49877.9  48087.1     290.0          1.77   
2024-02-29    48428.9   48291.2   49826.0  46392.2     550.0          0.34   
2024-02-28    48266.6   44822.7   50332.0  44600.0     720.0          7.72   
2024-02-27    44808.9   42720.5   45120.0  41954.2     360.0          4.90   
2024-02-26    42715.9   40829.1   43192.9  40129.8     270.0          4.62   

            Price_USD  Open_USD  High_USD  Low_USD  ...  Price_INR   Open_INR  \
Date                                                ...                         
2024-03-01    62346.0   61148.0   63000.0  60783.0  ...  5170624.0  5071696.0   
2024-02-29    61148.0   62380.0   63566.0  60385.0  ...  5073948.0  5176916.0   
2024-02-28    62380.0   56998.0   63780.0  56668.0  ...  5176321.0  4731160.0   
2024-02-27    56998.0   54417.0   57487.0  54388.0  ...  4731766.0  4513558.0   
2024-02-26    54417.0   51716.0   54720.0  50903.0  ...  4514234.0  4287393.0   

             High_INR    Low_INR  Vol._INR  Change %_INR  BTC_Returns_USD  \
Date                                                                        
2024-03-01  5227076.0  5036433.0       NaN          1.91              NaN   
2024-02-29  5279115.0  5017673.0       NaN         -1.98        -0.019215   
2024-02-28  5296428.0  4699779.0       NaN          9.40         0.020148   
2024-02-27  4768902.0  4512017.0       NaN          4.82        -0.086278   
2024-02-26  4548782.0  4221497.0       NaN          5.28        -0.045282   

            BTC_Volatility_USD  inflation_rate  interest_rate  
Date                                                           
2024-03-01                 NaN            2.40          -1.16  
2024-02-29                 NaN            2.40          -1.07  
2024-02-28                 NaN            2.38          -1.07  
2024-02-27                 NaN            2.38          -1.01  
2024-02-26                 NaN            2.36          -1.04  

[5 rows x 22 columns]
Ethereum & US Data:
             Price_USD  Open_USD  High_USD  Low_USD   Vol._USD  Change %_USD  \
Date                                                                          
2024-03-01    3433.51   3339.25   3449.56  3339.24   431980.0          2.82   
2024-02-29    3339.26   3382.98   3520.80  3300.42   802170.0         -1.29   
2024-02-28    3383.03   3242.44   3482.48  3183.92  1010000.0          4.35   
2024-02-27    3242.14   3175.94   3285.01  3165.91   674560.0          2.08   
2024-02-26    3175.94   3112.59   3193.67  3038.00   582390.0          2.04   

            Price_GBP  Open_GBP  High_GBP  Low_GBP  ...  Price_INR  Open_INR  \
Date                                                ...                        
2024-03-01    2712.34   2645.76   2730.76  2643.83  ...   284443.0  277151.0   
2024-02-29    2646.04   2617.37   2772.57  2570.71  ...   276986.0  280510.0   
2024-02-28    2616.93   2546.90   2748.91  2534.89  ...   280525.0  268963.0   
2024-02-27    2546.90   2490.22   2581.24  2409.79  ...   269047.0  263240.0   
2024-02-26    2493.37   2456.94   2515.03  2392.68  ...   263146.0  258124.0   

            High_INR   Low_INR  Vol._INR  Change %_INR  ETH_Returns_USD  \
Date                                                                      
2024-03-01  285858.0  276932.0       NaN          2.69              NaN   
2024-02-29  291857.0  273646.0       NaN         -1.26        -0.027450   
2024-02-28  288714.0  263953.0       NaN          4.27         0.013108   
2024-02-27  272341.0  262357.0       NaN          2.24        -0.041646   
2024-02-26  264699.0  251787.0       NaN          1.98        -0.020419   

            ETH_Volatility_USD  inflation_rate  interest_rate  
Date                                                           
2024-03-01                 NaN            2.40          -1.16  
2024-02-29                 NaN            2.40          -1.07  
2024-02-28                 NaN            2.38          -1.07  
2024-02-27                 NaN            2.38          -1.01  
2024-02-26                 NaN            2.36          -1.04  

[5 rows x 22 columns]
In [27]:
print(btc_usa_data[['interest_rate', 'inflation_rate', 'BTC_Volatility_USD']].isnull().sum())
print(eth_usa_data[['interest_rate', 'inflation_rate', 'ETH_Volatility_USD']].isnull().sum())
interest_rate         31
inflation_rate        31
BTC_Volatility_USD    72
dtype: int64
interest_rate         31
inflation_rate        31
ETH_Volatility_USD    72
dtype: int64
In [28]:
btc_usa_data = btc_usa_data.dropna(subset=['interest_rate', 'inflation_rate', 'BTC_Volatility_USD'])
eth_usa_data = eth_usa_data.dropna(subset=['interest_rate', 'inflation_rate', 'ETH_Volatility_USD'])
print(btc_usa_data[['interest_rate', 'inflation_rate', 'BTC_Volatility_USD']].isnull().sum())
print(eth_usa_data[['interest_rate', 'inflation_rate', 'ETH_Volatility_USD']].isnull().sum())
interest_rate         0
inflation_rate        0
BTC_Volatility_USD    0
dtype: int64
interest_rate         0
inflation_rate        0
ETH_Volatility_USD    0
dtype: int64
In [29]:
print(btc_usa_data[['interest_rate', 'inflation_rate', 'BTC_Volatility_USD']].describe())
print(eth_usa_data[['interest_rate', 'inflation_rate', 'ETH_Volatility_USD']].describe())
       interest_rate  inflation_rate  BTC_Volatility_USD
count     687.000000      687.000000          687.000000
mean        0.433857        2.556696            0.031123
std         1.110229        0.325399            0.008458
min        -1.790000        2.070000            0.015961
25%        -0.705000        2.290000            0.024272
50%         0.740000        2.510000            0.032284
75%         1.170000        2.720000            0.036521
max         2.780000        3.590000            0.049140
       interest_rate  inflation_rate  ETH_Volatility_USD
count     687.000000      687.000000          687.000000
mean        0.433857        2.556696            0.039776
std         1.110229        0.325399            0.014885
min        -1.790000        2.070000            0.016178
25%        -0.705000        2.290000            0.026996
50%         0.740000        2.510000            0.041584
75%         1.170000        2.720000            0.047648
max         2.780000        3.590000            0.074506
In [30]:
btc_usa_data['BTC_Volatility_USD'] = btc_usa_data['BTC_Volatility_USD'].clip(upper=1)  # Adjust limit as needed
eth_usa_data['ETH_Volatility_USD'] = eth_usa_data['ETH_Volatility_USD'].clip(upper=1)  # Adjust limit as needed
In [31]:
# Correlation Matrix
btc_corr_usa = btc_usa_data[['interest_rate', 'inflation_rate', 'BTC_Volatility_USD']].corr()

# Regression Analysis
X_btc = btc_usa_data[['interest_rate', 'inflation_rate']]
y_btc = btc_usa_data['BTC_Volatility_USD']

reg_btc = LinearRegression().fit(X_btc, y_btc)
btc_pred = reg_btc.predict(X_btc)

btc_r2 = r2_score(y_btc, btc_pred)

# Output the correlation matrix and R2 score
print("Bitcoin Correlation Matrix:\n", btc_corr_usa)
print(f"Bitcoin R2: {btc_r2}")
Bitcoin Correlation Matrix:
                     interest_rate  inflation_rate  BTC_Volatility_USD
interest_rate            1.000000        0.848611            0.757436
inflation_rate           0.848611        1.000000            0.621265
BTC_Volatility_USD       0.757436        0.621265            1.000000
Bitcoin R2: 0.5753622028334557
In [32]:
# Correlation Matrix for Ethereum
eth_corr_usa = eth_usa_data[['interest_rate', 'inflation_rate', 'ETH_Volatility_USD']].corr()

# Regression Analysis for Ethereum
X_eth = eth_usa_data[['interest_rate', 'inflation_rate']]
y_eth = eth_usa_data['ETH_Volatility_USD']

# Create a linear regression model
reg_eth = LinearRegression().fit(X_eth, y_eth)

# Make predictions using the regression model
eth_pred = reg_eth.predict(X_eth)

# Calculate the R2 score to measure the accuracy of the model
eth_r2 = r2_score(y_eth, eth_pred)

# Output the correlation matrix and R2 score
print("Ethereum Correlation Matrix:\n", eth_corr_usa)
print(f"Ethereum R2: {eth_r2}")
Ethereum Correlation Matrix:
                     interest_rate  inflation_rate  ETH_Volatility_USD
interest_rate            1.000000        0.848611            0.745792
inflation_rate           0.848611        1.000000            0.566951
ETH_Volatility_USD       0.745792        0.566951            1.000000
Ethereum R2: 0.5717404048555825
In [33]:
# Visualization function for correlation matrix
def visualize_correlation_matrix(corr_matrix, title):
    plt.figure(figsize=(8, 6))
    sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
    plt.title(title)
    plt.show()

# Visualizing the correlation matrices
visualize_correlation_matrix(btc_corr_usa, "Bitcoin Correlation Matrix (USD)")
visualize_correlation_matrix(eth_corr_usa, "Ethereum Correlation Matrix (USD)")
In [34]:
date_range = pd.date_range(start='2021-03-01', end='2024-03-01', freq='D')

# Add date column to the cryptocurrency datasets
bitcoin_df['Date'] = date_range[:len(bitcoin_df)]
ethereum_df['Date'] = date_range[:len(ethereum_df)]

# Convert to datetime and set as indices
bitcoin_df['Date'] = pd.to_datetime(bitcoin_df['Date'])
ethereum_df['Date'] = pd.to_datetime(ethereum_df['Date'])

bitcoin_df.set_index('Date', inplace=True)
ethereum_df.set_index('Date', inplace=True)
In [35]:
# Load UK economic data
inflation_uk_df = pd.read_excel('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\Interest Rate\\InflationRate_UK.xls')
interest_uk_df = pd.read_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\Interest Rate\\Interest_UK.csv')

# Convert date columns to datetime format
inflation_uk_df['Date'] = pd.to_datetime(inflation_uk_df['Date'])
interest_uk_df['Date'] = pd.to_datetime(interest_uk_df['Date'])

# Set dates as indices
inflation_uk_df.set_index('Date', inplace=True)
interest_uk_df.set_index('Date', inplace=True)

# Combine UK economic data into a single dataframe
uk_economic_data = pd.merge(inflation_uk_df, interest_uk_df, left_index=True, right_index=True)

# Display the combined UK economic data
print("UK Economic Data:\n", uk_economic_data.head())
UK Economic Data:
             inflation_rate  interest_rate
Date                                     
2021-03-01             1.0            0.1
2021-03-02             1.0            0.1
2021-03-03             1.0            0.1
2021-03-04             1.0            0.1
2021-03-05             1.0            0.1
In [36]:
uk_economic_data
Out[36]:
inflation_rate interest_rate
Date
2021-03-01 1.0 0.10
2021-03-02 1.0 0.10
2021-03-03 1.0 0.10
2021-03-04 1.0 0.10
2021-03-05 1.0 0.10
... ... ...
2024-02-26 3.8 5.25
2024-02-27 3.8 5.25
2024-02-28 3.8 5.25
2024-02-29 3.8 5.25
2024-03-01 3.8 5.25

785 rows × 2 columns

In [37]:
# Calculate daily returns
bitcoin_df['BTC_Returns_GBP'] = bitcoin_df['Price_GBP'].pct_change()
ethereum_df['ETH_Returns_GBP'] = ethereum_df['Price_GBP'].pct_change()

# 100-day rolling volatility
bitcoin_df['BTC_Volatility_GBP'] = bitcoin_df['BTC_Returns_GBP'].rolling(window=100).std()
ethereum_df['ETH_Volatility_GBP'] = ethereum_df['ETH_Returns_GBP'].rolling(window=100).std()

# Display the cryptocurrency data with added volatility
print("Bitcoin Data with Volatility:\n", bitcoin_df.head())
print("Ethereum Data with Volatility:\n", ethereum_df.head())
Bitcoin Data with Volatility:
             Price_GBP  Open_GBP  High_GBP  Low_GBP  Vol._GBP  Change %_GBP  \
Date                                                                         
2021-03-01    49286.4   48428.9   49877.9  48087.1     290.0          1.77   
2021-03-02    48428.9   48291.2   49826.0  46392.2     550.0          0.34   
2021-03-03    48266.6   44822.7   50332.0  44600.0     720.0          7.72   
2021-03-04    44808.9   42720.5   45120.0  41954.2     360.0          4.90   
2021-03-05    42715.9   40829.1   43192.9  40129.8     270.0          4.62   

            Price_USD  Open_USD  High_USD  Low_USD  ...  Price_INR   Open_INR  \
Date                                                ...                         
2021-03-01    62346.0   61148.0   63000.0  60783.0  ...  5170624.0  5071696.0   
2021-03-02    61148.0   62380.0   63566.0  60385.0  ...  5073948.0  5176916.0   
2021-03-03    62380.0   56998.0   63780.0  56668.0  ...  5176321.0  4731160.0   
2021-03-04    56998.0   54417.0   57487.0  54388.0  ...  4731766.0  4513558.0   
2021-03-05    54417.0   51716.0   54720.0  50903.0  ...  4514234.0  4287393.0   

             High_INR    Low_INR  Vol._INR  Change %_INR  BTC_Returns_USD  \
Date                                                                        
2021-03-01  5227076.0  5036433.0       NaN          1.91              NaN   
2021-03-02  5279115.0  5017673.0       NaN         -1.98        -0.019215   
2021-03-03  5296428.0  4699779.0       NaN          9.40         0.020148   
2021-03-04  4768902.0  4512017.0       NaN          4.82        -0.086278   
2021-03-05  4548782.0  4221497.0       NaN          5.28        -0.045282   

            BTC_Volatility_USD  BTC_Returns_GBP  BTC_Volatility_GBP  
Date                                                                 
2021-03-01                 NaN              NaN                 NaN  
2021-03-02                 NaN        -0.017398                 NaN  
2021-03-03                 NaN        -0.003351                 NaN  
2021-03-04                 NaN        -0.071638                 NaN  
2021-03-05                 NaN        -0.046709                 NaN  

[5 rows x 22 columns]
Ethereum Data with Volatility:
             Price_USD  Open_USD  High_USD  Low_USD   Vol._USD  Change %_USD  \
Date                                                                          
2021-03-01    3433.51   3339.25   3449.56  3339.24   431980.0          2.82   
2021-03-02    3339.26   3382.98   3520.80  3300.42   802170.0         -1.29   
2021-03-03    3383.03   3242.44   3482.48  3183.92  1010000.0          4.35   
2021-03-04    3242.14   3175.94   3285.01  3165.91   674560.0          2.08   
2021-03-05    3175.94   3112.59   3193.67  3038.00   582390.0          2.04   

            Price_GBP  Open_GBP  High_GBP  Low_GBP  ...  Price_INR  Open_INR  \
Date                                                ...                        
2021-03-01    2712.34   2645.76   2730.76  2643.83  ...   284443.0  277151.0   
2021-03-02    2646.04   2617.37   2772.57  2570.71  ...   276986.0  280510.0   
2021-03-03    2616.93   2546.90   2748.91  2534.89  ...   280525.0  268963.0   
2021-03-04    2546.90   2490.22   2581.24  2409.79  ...   269047.0  263240.0   
2021-03-05    2493.37   2456.94   2515.03  2392.68  ...   263146.0  258124.0   

            High_INR   Low_INR  Vol._INR  Change %_INR  ETH_Returns_USD  \
Date                                                                      
2021-03-01  285858.0  276932.0       NaN          2.69              NaN   
2021-03-02  291857.0  273646.0       NaN         -1.26        -0.027450   
2021-03-03  288714.0  263953.0       NaN          4.27         0.013108   
2021-03-04  272341.0  262357.0       NaN          2.24        -0.041646   
2021-03-05  264699.0  251787.0       NaN          1.98        -0.020419   

            ETH_Volatility_USD  ETH_Returns_GBP  ETH_Volatility_GBP  
Date                                                                 
2021-03-01                 NaN              NaN                 NaN  
2021-03-02                 NaN        -0.024444                 NaN  
2021-03-03                 NaN        -0.011001                 NaN  
2021-03-04                 NaN        -0.026760                 NaN  
2021-03-05                 NaN        -0.021018                 NaN  

[5 rows x 22 columns]
In [38]:
# Merge Bitcoin and Ethereum data with US economic data
btc_uk_data = pd.merge(bitcoin_df, uk_economic_data, left_index=True, right_index=True)
eth_uk_data = pd.merge(ethereum_df, uk_economic_data, left_index=True, right_index=True)

# Display the merged datasets
print("Bitcoin & UK Data:\n", btc_uk_data.head())
print("Ethereum & UK Data:\n", eth_uk_data.head())
Bitcoin & UK Data:
             Price_GBP  Open_GBP  High_GBP  Low_GBP  Vol._GBP  Change %_GBP  \
Date                                                                         
2021-03-01    49286.4   48428.9   49877.9  48087.1     290.0          1.77   
2021-03-02    48428.9   48291.2   49826.0  46392.2     550.0          0.34   
2021-03-03    48266.6   44822.7   50332.0  44600.0     720.0          7.72   
2021-03-04    44808.9   42720.5   45120.0  41954.2     360.0          4.90   
2021-03-05    42715.9   40829.1   43192.9  40129.8     270.0          4.62   

            Price_USD  Open_USD  High_USD  Low_USD  ...   High_INR    Low_INR  \
Date                                                ...                         
2021-03-01    62346.0   61148.0   63000.0  60783.0  ...  5227076.0  5036433.0   
2021-03-02    61148.0   62380.0   63566.0  60385.0  ...  5279115.0  5017673.0   
2021-03-03    62380.0   56998.0   63780.0  56668.0  ...  5296428.0  4699779.0   
2021-03-04    56998.0   54417.0   57487.0  54388.0  ...  4768902.0  4512017.0   
2021-03-05    54417.0   51716.0   54720.0  50903.0  ...  4548782.0  4221497.0   

            Vol._INR  Change %_INR  BTC_Returns_USD  BTC_Volatility_USD  \
Date                                                                      
2021-03-01       NaN          1.91              NaN                 NaN   
2021-03-02       NaN         -1.98        -0.019215                 NaN   
2021-03-03       NaN          9.40         0.020148                 NaN   
2021-03-04       NaN          4.82        -0.086278                 NaN   
2021-03-05       NaN          5.28        -0.045282                 NaN   

            BTC_Returns_GBP  BTC_Volatility_GBP  inflation_rate  interest_rate  
Date                                                                            
2021-03-01              NaN                 NaN             1.0            0.1  
2021-03-02        -0.017398                 NaN             1.0            0.1  
2021-03-03        -0.003351                 NaN             1.0            0.1  
2021-03-04        -0.071638                 NaN             1.0            0.1  
2021-03-05        -0.046709                 NaN             1.0            0.1  

[5 rows x 24 columns]
Ethereum & UK Data:
             Price_USD  Open_USD  High_USD  Low_USD   Vol._USD  Change %_USD  \
Date                                                                          
2021-03-01    3433.51   3339.25   3449.56  3339.24   431980.0          2.82   
2021-03-02    3339.26   3382.98   3520.80  3300.42   802170.0         -1.29   
2021-03-03    3383.03   3242.44   3482.48  3183.92  1010000.0          4.35   
2021-03-04    3242.14   3175.94   3285.01  3165.91   674560.0          2.08   
2021-03-05    3175.94   3112.59   3193.67  3038.00   582390.0          2.04   

            Price_GBP  Open_GBP  High_GBP  Low_GBP  ...  High_INR   Low_INR  \
Date                                                ...                       
2021-03-01    2712.34   2645.76   2730.76  2643.83  ...  285858.0  276932.0   
2021-03-02    2646.04   2617.37   2772.57  2570.71  ...  291857.0  273646.0   
2021-03-03    2616.93   2546.90   2748.91  2534.89  ...  288714.0  263953.0   
2021-03-04    2546.90   2490.22   2581.24  2409.79  ...  272341.0  262357.0   
2021-03-05    2493.37   2456.94   2515.03  2392.68  ...  264699.0  251787.0   

            Vol._INR  Change %_INR  ETH_Returns_USD  ETH_Volatility_USD  \
Date                                                                      
2021-03-01       NaN          2.69              NaN                 NaN   
2021-03-02       NaN         -1.26        -0.027450                 NaN   
2021-03-03       NaN          4.27         0.013108                 NaN   
2021-03-04       NaN          2.24        -0.041646                 NaN   
2021-03-05       NaN          1.98        -0.020419                 NaN   

            ETH_Returns_GBP  ETH_Volatility_GBP  inflation_rate  interest_rate  
Date                                                                            
2021-03-01              NaN                 NaN             1.0            0.1  
2021-03-02        -0.024444                 NaN             1.0            0.1  
2021-03-03        -0.011001                 NaN             1.0            0.1  
2021-03-04        -0.026760                 NaN             1.0            0.1  
2021-03-05        -0.021018                 NaN             1.0            0.1  

[5 rows x 24 columns]
In [39]:
print(btc_uk_data[['interest_rate', 'inflation_rate', 'BTC_Volatility_GBP']].isnull().sum())
print(eth_uk_data[['interest_rate', 'inflation_rate', 'ETH_Volatility_GBP']].isnull().sum())
interest_rate          0
inflation_rate         0
BTC_Volatility_GBP    72
dtype: int64
interest_rate          0
inflation_rate         0
ETH_Volatility_GBP    72
dtype: int64
In [40]:
btc_uk_data = btc_uk_data.dropna(subset=['interest_rate', 'inflation_rate', 'BTC_Volatility_GBP'])
eth_uk_data = eth_uk_data.dropna(subset=['interest_rate', 'inflation_rate', 'ETH_Volatility_GBP'])
print(btc_uk_data[['interest_rate', 'inflation_rate', 'BTC_Volatility_GBP']].isnull().sum())
print(eth_uk_data[['interest_rate', 'inflation_rate', 'ETH_Volatility_GBP']].isnull().sum())
interest_rate         0
inflation_rate        0
BTC_Volatility_GBP    0
dtype: int64
interest_rate         0
inflation_rate        0
ETH_Volatility_GBP    0
dtype: int64
In [41]:
print(btc_uk_data[['interest_rate', 'inflation_rate', 'BTC_Volatility_GBP']].describe())
print(eth_uk_data[['interest_rate', 'inflation_rate', 'ETH_Volatility_GBP']].describe())
       interest_rate  inflation_rate  BTC_Volatility_GBP
count     713.000000      713.000000          713.000000
mean        2.608555        6.340112            0.030181
std         2.063923        2.274132            0.008185
min         0.100000        2.100000            0.015654
25%         0.500000        4.200000            0.024544
50%         2.250000        6.300000            0.029860
75%         5.000000        8.600000            0.034887
max         5.250000        9.600000            0.048875
       interest_rate  inflation_rate  ETH_Volatility_GBP
count     713.000000      713.000000          713.000000
mean        2.608555        6.340112            0.038634
std         2.063923        2.274132            0.014402
min         0.100000        2.100000            0.015750
25%         0.500000        4.200000            0.026773
50%         2.250000        6.300000            0.039481
75%         5.000000        8.600000            0.045408
max         5.250000        9.600000            0.073705
In [42]:
btc_uk_data['BTC_Volatility_GBP'] = btc_uk_data['BTC_Volatility_GBP'].clip(upper=1)  # Adjust limit as needed
eth_uk_data['ETH_Volatility_GBP'] = eth_uk_data['ETH_Volatility_GBP'].clip(upper=1)  # Adjust limit as needed
In [43]:
# Correlation Matrix
btc_corr_uk = btc_uk_data[['interest_rate', 'inflation_rate', 'BTC_Volatility_GBP']].corr()

# Regression Analysis
X_btc = btc_uk_data[['interest_rate', 'inflation_rate']]
y_btc = btc_uk_data['BTC_Volatility_GBP']

reg_btc = LinearRegression().fit(X_btc, y_btc)
btc_pred = reg_btc.predict(X_btc)

btc_r2 = r2_score(y_btc, btc_pred)

# Output the correlation matrix and R2 score
print("Bitcoin Correlation Matrix:\n", btc_corr_uk)
print(f"Bitcoin R2: {btc_r2}")
Bitcoin Correlation Matrix:
                     interest_rate  inflation_rate  BTC_Volatility_GBP
interest_rate            1.000000        0.203298            0.856792
inflation_rate           0.203298        1.000000            0.165234
BTC_Volatility_GBP       0.856792        0.165234            1.000000
Bitcoin R2: 0.7341760223211335
In [44]:
# Correlation Matrix for Ethereum
eth_corr_uk = eth_uk_data[['interest_rate', 'inflation_rate', 'ETH_Volatility_GBP']].corr()

# Prepare independent variables (X) and dependent variable (y)
X_eth = eth_uk_data[['interest_rate', 'inflation_rate']]
y_eth = eth_uk_data['ETH_Volatility_USD']

# Create a linear regression model
reg_eth = LinearRegression().fit(X_eth, y_eth)

# Make predictions using the regression model
eth_pred = reg_eth.predict(X_eth)

# Calculate the R2 score to measure the accuracy of the model
eth_r2 = r2_score(y_eth, eth_pred)

# Output the correlation matrix and R2 score
print("Ethereum Correlation Matrix:\n", eth_corr_uk)
print(f"Ethereum R2: {eth_r2}")
Ethereum Correlation Matrix:
                     interest_rate  inflation_rate  ETH_Volatility_GBP
interest_rate            1.000000        0.203298            0.785631
inflation_rate           0.203298        1.000000            0.221015
ETH_Volatility_GBP       0.785631        0.221015            1.000000
Ethereum R2: 0.6250908661470469
In [45]:
# Visualization function for correlation matrix
def visualize_correlation_matrix(corr_matrix, title):
    plt.figure(figsize=(8, 6))
    sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
    plt.title(title)
    plt.show()

# Visualizing the correlation matrices
visualize_correlation_matrix(btc_corr_uk, "Bitcoin Correlation Matrix (UK)")
visualize_correlation_matrix(eth_corr_uk, "Ethereum Correlation Matrix (UK)")

2) Can machine learning models accurately detect and forecast patterns or cycles in  the price movements of Bitcoin and Ethereum, providing valuable guidance for  investors and analysts in the cryptocurrency space?​¶

In [46]:
# Defining a function to prepare data and run the model
def run_linear_model(dataframe, feature_col, target_col, test_size=0.2, random_state=42):
    # Prepare the data
    data_clean = dataframe.dropna(subset=[feature_col, target_col]).copy()
    X = data_clean[[feature_col]]
    y = data_clean[target_col]

    # Splitting the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    # Creating and training the model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Predicting and evaluating
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    # Results
    return mse, r2

# Adding 30-day rolling averages to both Bitcoin and Ethereum datasets
bitcoin_df['30d_rolling_avg_usd'] = bitcoin_df['Price_USD'].rolling(window=30, min_periods=1).mean()
bitcoin_df['30d_rolling_avg_gbp'] = bitcoin_df['Price_GBP'].rolling(window=30, min_periods=1).mean()
ethereum_df['30d_rolling_avg_usd'] = ethereum_df['Price_USD'].rolling(window=30, min_periods=1).mean()
ethereum_df['30d_rolling_avg_gbp'] = ethereum_df['Price_GBP'].rolling(window=30, min_periods=1).mean()

# Running models for Bitcoin and Ethereum in USD and GBP
mse_btc_usd, r2_btc_usd = run_linear_model(bitcoin_df, '30d_rolling_avg_usd', 'Price_USD')
mse_btc_gbp, r2_btc_gbp = run_linear_model(bitcoin_df, '30d_rolling_avg_gbp', 'Price_GBP')
mse_eth_usd, r2_eth_usd = run_linear_model(ethereum_df, '30d_rolling_avg_usd', 'Price_USD')
mse_eth_gbp, r2_eth_gbp = run_linear_model(ethereum_df, '30d_rolling_avg_gbp', 'Price_GBP')

print("Linear Regression - Results")
print("Bitcoin USD - MSE:", mse_btc_usd, "R^2 Score:", r2_btc_usd)
print("Bitcoin GBP - MSE:", mse_btc_gbp, "R^2 Score:", r2_btc_gbp)
print("Ethereum USD - MSE:", mse_eth_usd, "R^2 Score:", r2_eth_usd)
print("Ethereum GBP - MSE:", mse_eth_gbp, "R^2 Score:", r2_eth_gbp)
Linear Regression - Results
Bitcoin USD - MSE: 13721513.737440929 R^2 Score: 0.9135289380688452
Bitcoin GBP - MSE: 7615990.113462368 R^2 Score: 0.8981790657290526
Ethereum USD - MSE: 91914.67379741861 R^2 Score: 0.8691018986344661
Ethereum GBP - MSE: 50048.292438045515 R^2 Score: 0.8517974779901151
In [47]:
def run_random_forest_model(dataframe, feature_cols, target_col, random_state=0):
    # Re-loading and preparing the data
    dataframe['30d_rolling_avg'] = dataframe[target_col].rolling(window=30, min_periods=1).mean()
    dataframe['30d_volatility'] = dataframe[target_col].rolling(window=30, min_periods=1).std()
    
    # Filling any remaining NaN values with the mean of the column
    dataframe.fillna(dataframe.mean(), inplace=True)
    
    # Selecting features and re-standardizing
    scaler = StandardScaler()
    X = dataframe[feature_cols]
    X_scaled = scaler.fit_transform(X)
    
    y = dataframe[target_col]
    
    # Splitting the data
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=random_state)
    
    # Fitting the Random Forest Model
    rf_model = RandomForestRegressor(n_estimators=100, random_state=random_state)
    rf_model.fit(X_train, y_train)
    
    # Predicting and evaluating
    y_pred_rf = rf_model.predict(X_test)
    mse_rf = mean_squared_error(y_test, y_pred_rf)
    r2_rf = r2_score(y_test, y_pred_rf)
    
    # Return MSE and R² score
    return mse_rf, r2_rf

# Define the feature columns common to all model runs
feature_cols = ['30d_rolling_avg', '30d_volatility', 'Vol._USD']

mse_btc_usd_rf, r2_btc_usd_rf = run_random_forest_model(bitcoin_df, feature_cols, 'Price_USD')
mse_btc_gbp_rf, r2_btc_gbp_rf = run_random_forest_model(bitcoin_df, feature_cols, 'Price_GBP')
mse_eth_usd_rf, r2_eth_usd_rf = run_random_forest_model(ethereum_df, feature_cols, 'Price_USD')
mse_eth_gbp_rf, r2_eth_gbp_rf = run_random_forest_model(ethereum_df, feature_cols, 'Price_GBP')

print("Random Forest - Results")
print("Bitcoin USD - RF MSE:", mse_btc_usd_rf, "RF R2 Score:", r2_btc_usd_rf)
print("Bitcoin GBP - RF MSE:", mse_btc_gbp_rf, "RF R2 Score:", r2_btc_gbp_rf)
print("Ethereum USD - RF MSE:", mse_eth_usd_rf, "RF R2 Score:", r2_eth_usd_rf)
print("Ethereum GBP - RF MSE:", mse_eth_gbp_rf, "RF R2 Score:", r2_eth_gbp_rf)
Random Forest - Results
Bitcoin USD - RF MSE: 9751803.281279217 RF R2 Score: 0.9454346077877172
Bitcoin GBP - RF MSE: 5342113.424420669 RF R2 Score: 0.9359301270113953
Ethereum USD - RF MSE: 72238.21106403082 RF R2 Score: 0.9065374373570333
Ethereum GBP - RF MSE: 47663.14455619989 RF R2 Score: 0.8707663248926976

3) Are there identifiable patterns or cycles in the price movements of Bitcoin and Ethereum that can be predicted using machine learning models?¶

In [48]:
# Load the datasets
bitcoin_df = pd.read_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\merged_bitcoin_dataset.csv')
ethereum_df = pd.read_csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\AIT 582\\Project Proposal\\merged_ethereum_dataset.csv')

# Convert 'Date' column to datetime format for both datasets
bitcoin_df['Date'] = pd.to_datetime(bitcoin_df['Date'])
ethereum_df['Date'] = pd.to_datetime(ethereum_df['Date'])
In [49]:
def apply_fourier_transform(df, price_column):
    # Number of sample points
    N = df.shape[0]
    # Sample spacing
    T = 1.0  # assuming daily data, the sample spacing is 1 day
    x = df[price_column]
    yf = np.fft.fft(x)
    xf = np.fft.fftfreq(N, T)[:N//2]

    plt.figure(figsize=(14, 7))
    plt.plot(xf, 2.0/N * np.abs(yf[0:N//2]))
    plt.grid()
    plt.title(f'Fourier Transform - Frequency Components of {price_column}')
    plt.xlabel('Frequency (1/day)')
    plt.ylabel('Amplitude')
    plt.show()

# Apply Fourier Transform to Bitcoin and Ethereum price data
apply_fourier_transform(bitcoin_df, 'Price')
apply_fourier_transform(ethereum_df, 'Price')
In [50]:
def prepare_data(df, price_column, sequence_length):
    data = df[[price_column]].values
    scaler = MinMaxScaler(feature_range=(0, 1))
    data_normalized = scaler.fit_transform(data)

    x, y = [], []
    for i in range(sequence_length, len(data_normalized)):
        x.append(data_normalized[i-sequence_length:i])
        y.append(data_normalized[i, 0])
    x, y = np.array(x), np.array(y)
    
    # Split into train and test sets
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, shuffle=False)
    
    return x_train, y_train, x_test, y_test, scaler

# Prepare data
sequence_length = 50
x_train_btc, y_train_btc, x_test_btc, y_test_btc, scaler_btc = prepare_data(bitcoin_df, 'Price', sequence_length)
x_train_eth, y_train_eth, x_test_eth, y_test_eth, scaler_eth = prepare_data(ethereum_df, 'Price', sequence_length)

def build_lstm_model(input_shape):
    model = Sequential([
        LSTM(50, return_sequences=True, input_shape=(input_shape[0], input_shape[1])),
        Dropout(0.2),
        LSTM(50, return_sequences=False),
        Dropout(0.2),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Input shape correction
model_btc = build_lstm_model((x_train_btc.shape[1], x_train_btc.shape[2]))
model_eth = build_lstm_model((x_train_eth.shape[1], x_train_eth.shape[2]))

# Training the model
history_btc = model_btc.fit(x_train_btc, y_train_btc, epochs=50, batch_size=32, validation_data=(x_test_btc, y_test_btc), verbose=1)
history_eth = model_eth.fit(x_train_eth, y_train_eth, epochs=50, batch_size=32, validation_data=(x_test_eth, y_test_eth), verbose=1)
C:\Users\apoor_b31k2hq\AppData\Roaming\Python\Python39\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning:

Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.

Epoch 1/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 8s 102ms/step - loss: 0.0459 - val_loss: 0.0068
Epoch 2/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 3s 27ms/step - loss: 0.0056 - val_loss: 0.0062
Epoch 3/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 0.0038 - val_loss: 0.0063
Epoch 4/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 77ms/step - loss: 0.0035 - val_loss: 0.0056
Epoch 5/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 71ms/step - loss: 0.0041 - val_loss: 0.0056
Epoch 6/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 74ms/step - loss: 0.0034 - val_loss: 0.0051
Epoch 7/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 69ms/step - loss: 0.0031 - val_loss: 0.0049
Epoch 8/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 66ms/step - loss: 0.0030 - val_loss: 0.0050
Epoch 9/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 44ms/step - loss: 0.0033 - val_loss: 0.0045
Epoch 10/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0032 - val_loss: 0.0066
Epoch 11/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 54ms/step - loss: 0.0028 - val_loss: 0.0060
Epoch 12/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 67ms/step - loss: 0.0030 - val_loss: 0.0040
Epoch 13/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 45ms/step - loss: 0.0025 - val_loss: 0.0040
Epoch 14/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 0.0031 - val_loss: 0.0042
Epoch 15/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 63ms/step - loss: 0.0026 - val_loss: 0.0041
Epoch 16/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 66ms/step - loss: 0.0025 - val_loss: 0.0036
Epoch 17/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 72ms/step - loss: 0.0025 - val_loss: 0.0036
Epoch 18/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 72ms/step - loss: 0.0021 - val_loss: 0.0054
Epoch 19/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 59ms/step - loss: 0.0030 - val_loss: 0.0033
Epoch 20/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 3s 97ms/step - loss: 0.0021 - val_loss: 0.0038
Epoch 21/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 78ms/step - loss: 0.0024 - val_loss: 0.0042
Epoch 22/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 46ms/step - loss: 0.0023 - val_loss: 0.0032
Epoch 23/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0022 - val_loss: 0.0032
Epoch 24/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 65ms/step - loss: 0.0021 - val_loss: 0.0031
Epoch 25/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 44ms/step - loss: 0.0018 - val_loss: 0.0031
Epoch 26/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0022 - val_loss: 0.0031
Epoch 27/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 55ms/step - loss: 0.0020 - val_loss: 0.0043
Epoch 28/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 74ms/step - loss: 0.0027 - val_loss: 0.0029
Epoch 29/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 70ms/step - loss: 0.0021 - val_loss: 0.0029
Epoch 30/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 68ms/step - loss: 0.0023 - val_loss: 0.0031
Epoch 31/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 80ms/step - loss: 0.0021 - val_loss: 0.0039
Epoch 32/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 78ms/step - loss: 0.0024 - val_loss: 0.0029
Epoch 33/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 82ms/step - loss: 0.0018 - val_loss: 0.0037
Epoch 34/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 45ms/step - loss: 0.0019 - val_loss: 0.0026
Epoch 35/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0019 - val_loss: 0.0047
Epoch 36/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 58ms/step - loss: 0.0020 - val_loss: 0.0026
Epoch 37/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 73ms/step - loss: 0.0018 - val_loss: 0.0027
Epoch 38/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 3s 85ms/step - loss: 0.0018 - val_loss: 0.0024
Epoch 39/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 3s 85ms/step - loss: 0.0016 - val_loss: 0.0024
Epoch 40/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 75ms/step - loss: 0.0016 - val_loss: 0.0024
Epoch 41/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 77ms/step - loss: 0.0017 - val_loss: 0.0028
Epoch 42/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 3s 88ms/step - loss: 0.0020 - val_loss: 0.0032
Epoch 43/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 3s 79ms/step - loss: 0.0020 - val_loss: 0.0027
Epoch 44/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 3s 97ms/step - loss: 0.0018 - val_loss: 0.0023
Epoch 45/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 3s 82ms/step - loss: 0.0014 - val_loss: 0.0024
Epoch 46/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 87ms/step - loss: 0.0022 - val_loss: 0.0026
Epoch 47/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 83ms/step - loss: 0.0017 - val_loss: 0.0022
Epoch 48/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 66ms/step - loss: 0.0017 - val_loss: 0.0032
Epoch 49/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 71ms/step - loss: 0.0021 - val_loss: 0.0023
Epoch 50/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 44ms/step - loss: 0.0019 - val_loss: 0.0022
Epoch 1/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 7s 106ms/step - loss: 0.0706 - val_loss: 0.0119
Epoch 2/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 78ms/step - loss: 0.0066 - val_loss: 0.0120
Epoch 3/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 77ms/step - loss: 0.0052 - val_loss: 0.0103
Epoch 4/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 41ms/step - loss: 0.0046 - val_loss: 0.0119
Epoch 5/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 0.0054 - val_loss: 0.0095
Epoch 6/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 55ms/step - loss: 0.0036 - val_loss: 0.0091
Epoch 7/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 41ms/step - loss: 0.0036 - val_loss: 0.0084
Epoch 8/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0037 - val_loss: 0.0093
Epoch 9/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0038 - val_loss: 0.0079
Epoch 10/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 71ms/step - loss: 0.0035 - val_loss: 0.0078
Epoch 11/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 73ms/step - loss: 0.0031 - val_loss: 0.0075
Epoch 12/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 42ms/step - loss: 0.0030 - val_loss: 0.0074
Epoch 13/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 0.0031 - val_loss: 0.0073
Epoch 14/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 56ms/step - loss: 0.0038 - val_loss: 0.0077
Epoch 15/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 79ms/step - loss: 0.0027 - val_loss: 0.0066
Epoch 16/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 80ms/step - loss: 0.0030 - val_loss: 0.0065
Epoch 17/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 84ms/step - loss: 0.0032 - val_loss: 0.0064
Epoch 18/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 3s 81ms/step - loss: 0.0026 - val_loss: 0.0061
Epoch 19/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 68ms/step - loss: 0.0030 - val_loss: 0.0064
Epoch 20/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 63ms/step - loss: 0.0028 - val_loss: 0.0060
Epoch 21/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 64ms/step - loss: 0.0029 - val_loss: 0.0057
Epoch 22/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 64ms/step - loss: 0.0025 - val_loss: 0.0056
Epoch 23/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 45ms/step - loss: 0.0026 - val_loss: 0.0054
Epoch 24/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 0.0024 - val_loss: 0.0052
Epoch 25/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 52ms/step - loss: 0.0028 - val_loss: 0.0053
Epoch 26/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 65ms/step - loss: 0.0031 - val_loss: 0.0052
Epoch 27/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 70ms/step - loss: 0.0030 - val_loss: 0.0049
Epoch 28/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 70ms/step - loss: 0.0026 - val_loss: 0.0046
Epoch 29/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 71ms/step - loss: 0.0024 - val_loss: 0.0053
Epoch 30/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 68ms/step - loss: 0.0025 - val_loss: 0.0046
Epoch 31/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 75ms/step - loss: 0.0026 - val_loss: 0.0043
Epoch 32/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 41ms/step - loss: 0.0023 - val_loss: 0.0043
Epoch 33/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 0.0022 - val_loss: 0.0043
Epoch 34/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 55ms/step - loss: 0.0021 - val_loss: 0.0047
Epoch 35/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 65ms/step - loss: 0.0025 - val_loss: 0.0040
Epoch 36/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 84ms/step - loss: 0.0023 - val_loss: 0.0043
Epoch 37/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 71ms/step - loss: 0.0023 - val_loss: 0.0045
Epoch 38/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 74ms/step - loss: 0.0027 - val_loss: 0.0040
Epoch 39/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 80ms/step - loss: 0.0023 - val_loss: 0.0038
Epoch 40/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 73ms/step - loss: 0.0021 - val_loss: 0.0037
Epoch 41/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 41ms/step - loss: 0.0021 - val_loss: 0.0037
Epoch 42/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 0.0022 - val_loss: 0.0036
Epoch 43/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 52ms/step - loss: 0.0020 - val_loss: 0.0037
Epoch 44/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 44ms/step - loss: 0.0020 - val_loss: 0.0035
Epoch 45/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 0.0023 - val_loss: 0.0036
Epoch 46/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 58ms/step - loss: 0.0019 - val_loss: 0.0035
Epoch 47/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 80ms/step - loss: 0.0021 - val_loss: 0.0037
Epoch 48/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 3s 79ms/step - loss: 0.0019 - val_loss: 0.0035
Epoch 49/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 83ms/step - loss: 0.0019 - val_loss: 0.0033
Epoch 50/50
27/27 ━━━━━━━━━━━━━━━━━━━━ 2s 72ms/step - loss: 0.0015 - val_loss: 0.0036
In [51]:
def evaluate_model(model, x_test, y_test, scaler):
    # Predict the prices using the model
    predicted_prices = model.predict(x_test)
    # Inverse transform to get actual prices
    predicted_prices = scaler.inverse_transform(predicted_prices)
    actual_prices = scaler.inverse_transform(y_test.reshape(-1, 1))

    # Calculate the evaluation metrics
    mse = mean_squared_error(actual_prices, predicted_prices)
    mae = mean_absolute_error(actual_prices, predicted_prices)
    rmse = np.sqrt(mse)

    return predicted_prices, actual_prices, mse, mae, rmse

# Evaluate Bitcoin model
predicted_btc, actual_btc, mse_btc, mae_btc, rmse_btc = evaluate_model(model_btc, x_test_btc, y_test_btc, scaler_btc)
# Evaluate Ethereum model
predicted_eth, actual_eth, mse_eth, mae_eth, rmse_eth = evaluate_model(model_eth, x_test_eth, y_test_eth, scaler_eth)

print(f"Bitcoin Model - MSE: {mse_btc}, MAE: {mae_btc}, RMSE: {rmse_btc}")
print(f"Ethereum Model - MSE: {mse_eth}, MAE: {mae_eth}, RMSE: {rmse_eth}")
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 91ms/step
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 101ms/step
Bitcoin Model - MSE: 32778201485.554764, MAE: 146029.65714285715, RMSE: 181047.51168009674
Ethereum Model - MSE: 280453667.4000361, MAE: 12771.882961309524, RMSE: 16746.75095055862
In [52]:
def plot_interactive_predictions(actual, predicted, title):
    fig = go.Figure()
    # Add actual price trace
    fig.add_trace(go.Scatter(x=list(range(len(actual))), y=actual.flatten(), mode='lines', name='Actual Price'))
    # Add predicted price trace
    fig.add_trace(go.Scatter(x=list(range(len(predicted))), y=predicted.flatten(), mode='lines', name='Predicted Price'))
    
    # Update layout
    fig.update_layout(title=title, xaxis_title='Time', yaxis_title='Price',
                      legend_title="Legend", template="plotly_white")
    fig.show()

# Plot Bitcoin and Ethereum predictions interactively
plot_interactive_predictions(actual_btc, predicted_btc, 'Interactive Bitcoin Price Prediction')
plot_interactive_predictions(actual_eth, predicted_eth, 'Interactive Ethereum Price Prediction')