We provide quantitative data insights on various asset classes

Instrumental Analysis

Equity Space

SnP - ESc1
Dow - YMc1

Bonds Space

Bund - FGBLc1
10 Year t-note - ZNc1
BTP - FBTMc1
FOAT - FOATc1
UK Gilts - Not Available
US 10 year - TYc1
CGB - CGBc1
Australia 10 Year Bill - YTCc1

Metals Space

Gold - GCv1
Silver - SIv1
Copper - HGv1

Energy Space

RBOB - RBc1
GO - LGOc1
BRENT - LCOc1
WTI - CLc1
Brent - WTI - Not Available

In [34]:
import warnings
warnings.filterwarnings('ignore')
In [35]:
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri
pandas2ri.activate()
In [143]:
from datetime import datetime
readRDS = robjects.r['readRDS']
#df = readRDS('CLc1.rds')

df = readRDS('ESc1.rds')

#df = pandas2ri.ri2py(df)
# do something with the dataframe
df_min=df
In [144]:
%matplotlib inline
#df=df.set_index('Timestamp')
#df.index=df.index.tz_localize(None)
df_min=df
print(df.head())
df['Last'].plot()
df=df.dropna()

print(df.info())

import seaborn as sns
sns.heatmap(df.isnull(), cbar=False)
                     Timestamp     Open     High      Low     Last  Volume
4261 2010-01-04 04:30:00+05:30  1113.75  1114.75  1113.25  1114.25    3420
4262 2010-01-04 04:31:00+05:30  1114.25  1115.25  1114.25  1114.75     437
4263 2010-01-04 04:32:00+05:30  1114.75  1114.75  1114.25  1114.50     596
4264 2010-01-04 04:33:00+05:30  1114.25  1114.75  1114.25  1114.50     131
4265 2010-01-04 04:34:00+05:30  1114.50  1114.50  1114.25  1114.25     166
<class 'pandas.core.frame.DataFrame'>
Index: 3603769 entries, 4261 to 5499351
Data columns (total 6 columns):
 #   Column     Dtype                        
---  ------     -----                        
 0   Timestamp  datetime64[ns, Asia/Calcutta]
 1   Open       float64                      
 2   High       float64                      
 3   Low        float64                      
 4   Last       float64                      
 5   Volume     int32                        
dtypes: datetime64[ns, Asia/Calcutta](1), float64(4), int32(1)
memory usage: 178.7+ MB
None
Out[144]:
<matplotlib.axes._subplots.AxesSubplot at 0x21c3bf25358>
In [145]:
import matplotlib.pyplot as plt
%matplotlib inline
df=df.set_index('Timestamp')

df.index=df.index.tz_localize(None)
df_min=df
print(df.head())
plt.title("Close price of SnP")
df['Last'].plot()
plt.show()
                        Open     High      Low     Last  Volume
Timestamp                                                      
2010-01-04 04:30:00  1113.75  1114.75  1113.25  1114.25    3420
2010-01-04 04:31:00  1114.25  1115.25  1114.25  1114.75     437
2010-01-04 04:32:00  1114.75  1114.75  1114.25  1114.50     596
2010-01-04 04:33:00  1114.25  1114.75  1114.25  1114.50     131
2010-01-04 04:34:00  1114.50  1114.50  1114.25  1114.25     166

Resampling the minute data into daily data frame

In [147]:
ohlc_dict = {                                                                                                             
'Open':'first',                                                                                                    
'High':'max',                                                                                                       
'Low':'min',                                                                                                        
'Last': 'last',                                                                                                    
'Volume': 'sum'
}
df_daily=df.resample('1D').apply(ohlc_dict)
In [148]:
df_daily=df_daily.dropna()
print(df_daily.info())

import seaborn as sns
sns.heatmap(df_daily.isnull(), cbar=False)
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3196 entries, 2010-01-04 to 2020-06-16
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Open    3196 non-null   float64
 1   High    3196 non-null   float64
 2   Low     3196 non-null   float64
 3   Last    3196 non-null   float64
 4   Volume  3196 non-null   int32  
dtypes: float64(4), int32(1)
memory usage: 137.3 KB
None
Out[148]:
<matplotlib.axes._subplots.AxesSubplot at 0x21d0209f438>
In [ ]:
 
In [149]:
#df_daily['2014-05':'2015-05']['High'].plot()
In [150]:
df_daily['2014-05-31':'2015-05-04']
Out[150]:
Open High Low Last Volume
Timestamp
2014-05-31 1919.50 1922.25 1917.25 1919.00 450153
2014-06-02 1921.25 1924.25 1913.75 1922.75 826186
2014-06-03 1923.00 1924.00 1916.00 1922.25 943347
2014-06-04 1922.50 1927.00 1916.00 1925.50 903640
2014-06-05 1925.25 1940.75 1921.00 1939.50 1581620
... ... ... ... ... ...
2015-04-29 2105.50 2112.50 2090.50 2103.50 1643376
2015-04-30 2103.50 2107.75 2081.75 2082.00 1638867
2015-05-01 2082.00 2101.75 2070.25 2101.75 1483892
2015-05-02 2101.75 2102.50 2096.75 2102.25 293680
2015-05-04 2101.75 2115.00 2098.00 2111.25 757973

282 rows × 5 columns

In [151]:
print(len(df['2014-06-30':'2014-07-01']))
2527
In [152]:
df['2014-05-31':'2014-06-01']
Out[152]:
Open High Low Last Volume
Timestamp
2014-05-31 00:00:00 1919.50 1919.50 1919.00 1919.00 829
2014-05-31 00:01:00 1919.00 1919.25 1918.75 1918.75 714
2014-05-31 00:02:00 1918.75 1919.00 1918.50 1918.50 2967
2014-05-31 00:03:00 1918.50 1918.75 1918.25 1918.25 657
2014-05-31 00:04:00 1918.25 1919.00 1918.25 1918.50 1650
... ... ... ... ... ...
2014-05-31 02:41:00 1919.50 1919.50 1919.25 1919.25 302
2014-05-31 02:42:00 1919.25 1919.25 1919.00 1919.00 74
2014-05-31 02:43:00 1919.00 1919.25 1919.00 1919.25 253
2014-05-31 02:44:00 1919.00 1919.25 1919.00 1919.00 787
2014-05-31 02:45:00 1919.00 1919.00 1919.00 1919.00 1

151 rows × 5 columns

In [153]:
df_daily['Close']=df_daily['Last']
In [154]:
df=df_daily
print('The total no of days is',len(df))
The total no of days is 3196

Data Parameters Calculation

Here we extend our data frame by adding different analytical parameters such as returns,o-h ranging ,previous_close-high,gap-up/gap-down etc

In [156]:
import numpy as np
df['Returns']=((df['Close']-df['Open'])/df['Close'])*100

#df['Returns']=

#df['Returns']=((df['Close']-df['Close'].shift(1))/df['Close'].shift(1))*100

df['Prev_Close']=df['Close'].shift(1)

df['Log_Returns']=np.log((df['Close']/df['Prev_Close']))*100



df['PrevClose-Current_High']=((df['High']-df['Prev_Close'])/df['Prev_Close'])*100

df['Gap-up/down']=((df['Open']-df['Prev_Close'])/df['Prev_Close'])*100

df['Max_Swings']=((df['High']-df['Low'])/df['High'])*100
df['O-H']=((df['High']-df['Open'])/df['Open'])*100


#print(df.head())
df=df.dropna()
print(df.head())
               Open     High      Low     Last   Volume    Close  Returns  \
Timestamp                                                                   
2010-01-06  1127.75  1134.50  1126.00  1133.25  1280237  1133.25    0.485   
2010-01-07  1133.00  1136.00  1127.00  1135.00  1405602  1135.00    0.176   
2010-01-08  1134.75  1139.50  1131.00  1137.25  1656934  1137.25    0.220   
2010-01-09  1137.25  1141.75  1135.75  1141.50   478334  1141.50    0.372   
2010-01-11  1141.75  1148.00  1137.75  1139.75   977578  1139.75   -0.175   

            Prev_Close  Log_Returns  PrevClose-Current_High  Gap-up/down  \
Timestamp                                                                  
2010-01-06     1127.75        0.487                   0.599        0.000   
2010-01-07     1133.25        0.154                   0.243       -0.022   
2010-01-08     1135.00        0.198                   0.396       -0.022   
2010-01-09     1137.25        0.373                   0.396        0.000   
2010-01-11     1141.50       -0.153                   0.569        0.022   

            Max_Swings    O-H  
Timestamp                      
2010-01-06       0.749  0.599  
2010-01-07       0.792  0.265  
2010-01-08       0.746  0.419  
2010-01-09       0.526  0.396  
2010-01-11       0.893  0.547  

Statistical Measures

The following tables gives us insight about mean,median,standard deviation and the upper and lower quartile ranges

In [157]:
import pandas as pd
pd.set_option('precision', 3)
df.describe()
Out[157]:
Open High Low Last Volume Close Returns Prev_Close Log_Returns PrevClose-Current_High Gap-up/down Max_Swings O-H
count 3194.000 3194.000 3194.000 3194.000 3.194e+03 3194.000 3194.000 3194.000 3194.000 3194.000 3194.000 3194.000 3194.000
mean 2004.140 2015.489 1992.149 2005.098 1.311e+06 2005.098 0.043 2004.482 0.032 0.565 -0.015 1.180 0.581
std 617.445 620.510 614.151 617.756 7.822e+05 617.756 0.897 617.648 0.922 0.644 0.241 0.976 0.644
min 1011.750 1018.500 1002.750 1012.000 1.200e+01 1012.000 -7.671 1012.000 -8.300 -2.413 -9.192 0.000 0.000
25% 1399.062 1404.188 1390.312 1399.250 7.094e+05 1399.250 -0.274 1399.250 -0.295 0.171 -0.009 0.588 0.177
50% 2007.625 2022.375 1997.125 2008.250 1.265e+06 2008.250 0.075 2008.000 0.069 0.397 0.000 0.924 0.404
75% 2536.250 2552.938 2510.875 2536.438 1.745e+06 2536.438 0.435 2534.000 0.434 0.749 0.008 1.470 0.747
max 3390.500 3397.500 3382.000 3390.500 6.679e+06 3390.500 5.886 3390.500 6.077 7.866 1.716 11.766 7.854
In [185]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
sns.set(color_codes=True)
In [186]:
plt.figure(figsize=(12,8))
sns.distplot(df['Log_Returns'],bins=20)
Out[186]:
<matplotlib.axes._subplots.AxesSubplot at 0x21c43176be0>
In [187]:
df['year']=df.index.year
df['month']=df.index.month
df['day']=df.index.day
df['week']=df.index.week
df=df.dropna()

print(min(df['Returns']))
import plotly.express as px

fig = px.scatter(df, x="day", y="Returns", animation_frame="year", animation_group="day",
           size="Volume", color="month", hover_name="year", facet_col="month",
           log_x=False, size_max=45, range_x=[0,35], range_y=[-50,10], width=1800, height=400)
fig.show()
-7.671398369072787
In [199]:
fig = px.histogram(df, x='Log_Returns',nbins=20,title='Logarithmic Returns Distribution')
fig.show()
In [200]:
import plotly.express as px
fig = px.histogram(df, x="Max_Swings",nbins=50,title='Max Swing Distribution')
fig.show()
sns.distplot(df['Max_Swings'],bins=20)
Out[200]:
<matplotlib.axes._subplots.AxesSubplot at 0x21c3b2d22b0>
In [205]:
#a=df[df['Returns']>-30]
#df2=a

#print(df2)
#fig = px.histogram(df, x="Returns",nbins=50,histnorm='probability')
#fig.show()
#fig = px.histogram(df, x="R",nbins=100)
#fig = px.histogram(df, x="Returns",nbins=50)
#fig.show()
#plt.figure(figsize=(12,8))
#sns.distplot(df['Returns'],bins=40,color='g')
#from scipy.stats import norm
#plt.figure(figsize=(12,8))
#plt.hist(df['Returns'], bins=40, color='g');
#plt.plot(df['Returns'], norm.pdf(df['Returns']),hist)
plt.figure(figsize=(12,8))
sns.distplot(df['Returns'],bins=40,color='g')
plt.gca().set(title='Returns Distribution', ylabel='Frequency',xlabel="Returns");
plt.show()
In [207]:
fig = px.histogram(df, x="O-H",nbins=50,title="Open High Range Distribution",histnorm='probability')
fig.show()
#from scipy.stats import norm
#plt.figure(figsize=(12,8))
#plt.hist(df['O-H'], bins=40, color='g');
##plt.plot(df['Returns'], norm.pdf(df['Returns']),hist)
#plt.gca().set(title='Returns Distribution', ylabel='Frequency',xlabel="Open-High Range");
#plt.xlim(-6,6,0.5)
#plt.xticks()
#plt.show()

#plt.figure(figsize=(12,8))
#sns.distplot(df['O-H'],bins=40,color='g')
#plt.gca().set(title='Open High Ranging', ylabel='Frequency',xlabel="Returns");
#plt.show()
In [ ]:
 
In [192]:
s=df[df['O-H']>4]
print(len(s))
probab_greater_than_3=(len(s)/len(df_daily))*100

probab_greater_than_3
16
Out[192]:
0.5006257822277848

Probability of price going higher than 4% from open is about 0.5%. So you can co-relate probablity with risk and find suitable exits or trail your trade accordingly.

In [209]:
fig = px.histogram(df, x="PrevClose-Current_High",nbins=50,title="Previous Close to Current Day High")
fig.show()

#plt.figure(figsize=(12,8))
#sns.distplot(df['PrevClose-Current_High'],bins=40,color='g',kde=False,hist=True)
#plt.gca().set(title='PrevClose-Current_High Range');
#plt.show()
In [194]:
#fig = px.scatter(df,y="Gap-up/down",title="Gap Up/Gap Down Distribution")
#fig.show()



#df1=pd.DataFrame()

#df_1=df[df["Gap-up/down"]>-2]
#print(df["Gap-up/down"].count())
#print(df_1)
#plt.figure(figsize=(16,10))
#sns.scatterplot(df["Gap-up/down"])

#sns.scatterplot(y=df["Gap-up/down"],x=df["Returns"],color='g')
#plt.xlim(df.index[0],df.index[-1])

#plt.xlim(df.index[0],df.index[-1])
#plt.gca().set(title="Gap-up/down distribution");
#plt.ylim(0,1)
#plt.scatter(df["Gap-up/down"])
#plt.plot(df["Returns"])
#plt.legend()

#plt.plot(df["Returns"]/max(abs(df["Returns"])))
#plt.plot(df["Gap-up/down"]/max(abs(df["Gap-up/down"])))
#sns.pairplot(data=df)
#plt.plot()
#plt.show()

#import scipy
#a=scipy.stats.pearsonr(df["Returns"],df["Max_Swings"] )
#print(a)
#plt.hist(df["Gap-up/down"])
In [218]:
plt.figure(figsize=(12,8))
df['weekday']=df.index.dayofweek
df_weekday=df.groupby('weekday').mean()

#df_weekday['Volume'].plot()
#a['Close'].plot()
#fig = px.scatter(df_weekday,y="Volume",title="Volume Distribution per day")
sns.set(color_codes=True)
sns.scatterplot(x=df_weekday.index,y=df_weekday["Volume"],color='r',marker='^',s=500)
plt.show()
#fig.show()

Remark:

As international market is open from Monday to Saturday, we can see tha a significantly reduced amount is traded on Monday and lowest on Saturday. Tuesday-Friday volume is on similar notes.

In [97]:
import pandas as pd
import matplotlib.pyplot as plt
df.index=pd.to_datetime(df.index)
prices = df['Close']
returns = prices.pct_change()
import monthly_returns_heatmap as mrh
plt.figure(figsize=(20,10))
#returns.plot_monthly_returns_heatmap()
mrh.plot(returns) # <== or using direct call

This monthly returns gives us the seasonality of the intrument which is useful in Options Trading.

In [196]:
import seaborn as sns
sns.set(style="ticks")


#a=df[df['Returns']>-30]
#df2=a[a['Max_Swings']<50]

#rs = np.random.RandomState(11)
x = df['Returns']
y = df['Max_Swings']

#sns.jointplot(x, y, kind="hex", color="#4CB391")

sns.jointplot(x=df['Returns'], y= df['Max_Swings'], data=df);
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [201]:
df['year']=df.index.year
df['month']=df.index.month
df['day']=df.index.day
df['week']=df.index.week
df=df.dropna()
import plotly.express as px

fig = px.scatter(df, x="day", y="Returns", animation_frame="year", animation_group="day",
           size="Volume", color="month", hover_name="year", facet_col="month",
           log_x=False, size_max=45, range_x=[0,35], range_y=[-10,10], width=1800, height=400)
fig.show()
In [139]:
df2=df_min
df_min['Hour']=((df_min.index).hour)
In [ ]:
 
In [118]:
df_min['date']=df_min.index.date
In [119]:
high_daily=df_min.groupby("date").agg({"High":max})
high_daily.reset_index(inplace=True)

print(high_daily.head())
         date     High
0  2010-01-04  1129.75
1  2010-01-05  1132.75
2  2010-01-06  1134.50
3  2010-01-07  1136.00
4  2010-01-08  1139.50
In [120]:
from tqdm import tqdm
high_list=[]
for i in tqdm(range(len(high_daily))):
    a=df_min[(df_min['High']==high_daily['High'][i]) & (df_min['date']==high_daily['date'][i])]
    t=a["Hour"][0]
    #print(int(t))
    high_list.append(t)
   
100%|██████████████████████████████████████████████████████████████████████████████| 3196/3196 [23:13<00:00,  2.29it/s]
In [128]:
#fig = px.histogram(high_list,x=high_list,title='Day High Occurence')
#fig.show()
plt.figure(figsize=(12,8))
sns.distplot(high_list,bins=20,color='g')
plt.gca().set(title='Day High Time analysis', ylabel='Probability ');
plt.show()

As we can see the majority of the time ,the day high is either in the 1st hr and a last few hrs,so this gives us an idea about as to at what time we can expect the movement and swing at the either side.

In [124]:
from collections import Counter
Counter(high_list)
Out[124]:
Counter({22: 192,
         21: 207,
         23: 452,
         12: 69,
         2: 188,
         14: 74,
         20: 249,
         3: 83,
         11: 32,
         1: 318,
         0: 507,
         16: 55,
         10: 22,
         18: 101,
         17: 66,
         13: 62,
         7: 38,
         19: 235,
         4: 72,
         15: 51,
         9: 16,
         6: 41,
         5: 46,
         8: 20})
In [121]:
low_daily=df_min.groupby("date").agg({"Low":min})
low_daily.reset_index(inplace=True)

print(low_daily.head())
         date      Low
0  2010-01-04  1113.25
1  2010-01-05  1125.00
2  2010-01-06  1126.00
3  2010-01-07  1127.00
4  2010-01-08  1131.00
In [34]:
 
In [122]:
from tqdm import tqdm
low_list=[]
for i in tqdm(range(len(low_daily))):
    a=df_min[(df_min['Low']==low_daily['Low'][i]) & (df_min['date']==low_daily['date'][i])]
    t=a["Hour"][0]
    #print(int(t))
    low_list.append(t)
   
100%|██████████████████████████████████████████████████████████████████████████████| 3196/3196 [23:09<00:00,  2.30it/s]
In [125]:
from collections import Counter
Counter(low_list)
Out[125]:
Counter({4: 78,
         20: 264,
         0: 686,
         15: 47,
         19: 208,
         23: 326,
         2: 141,
         1: 344,
         5: 58,
         18: 76,
         21: 189,
         22: 195,
         10: 22,
         11: 32,
         12: 46,
         13: 65,
         17: 40,
         14: 68,
         16: 46,
         3: 118,
         6: 58,
         9: 23,
         7: 46,
         8: 20})
In [ ]:
 
In [127]:
#fig = px.histogram(low_list,x=low_list)
#fig.show()
plt.figure(figsize=(12,8))
sns.distplot(low_list,bins=20,color='g')
plt.gca().set(title='Day low Time analysis', ylabel='Probability ');
plt.show()
In [129]:
df_min.describe()
Out[129]:
Open High Low Last Volume Hour
count 3.604e+06 3.604e+06 3.604e+06 3.604e+06 3.604e+06 3.604e+06
mean 2.004e+03 2.004e+03 2.003e+03 2.004e+03 1.163e+03 1.200e+01
std 6.178e+02 6.179e+02 6.177e+02 6.178e+02 2.609e+03 6.776e+00
min 1.004e+03 1.004e+03 1.003e+03 1.004e+03 1.000e+00 0.000e+00
25% 1.396e+03 1.396e+03 1.396e+03 1.396e+03 5.800e+01 6.000e+00
50% 2.010e+03 2.010e+03 2.010e+03 2.010e+03 2.410e+02 1.200e+01
75% 2.536e+03 2.537e+03 2.536e+03 2.536e+03 1.142e+03 1.800e+01
max 3.397e+03 3.398e+03 3.396e+03 3.397e+03 2.370e+05 2.300e+01
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [175]:
#import plotly.io as pio
#pio.renderers.default='notebook'
In [ ]:
 
In [ ]:
 
In [171]:
#import plotly.io as pio
#pio.renderers.default = "notebook_connected"
In [197]:
import plotly.offline as pyo
import plotly.graph_objs as go
# Set notebook mode to work in offline
pyo.init_notebook_mode()
In [ ]: