1.3 金融数据可视化
跳转到根目录:知行合一:投资篇
已完成:
1、投资&技术
??1.1.1 投资-编程基础-numpy
??1.1.2 投资-编程基础-pandas
??1.2 金融数据处理
??1.3 金融数据可视化
2、投资方法论
??2.1.1 预期年化收益率
3、投资实证
??[3.1 2023这一年] 被鸽
1. 金融数据可视化
1.1. matplotlib
1.1.1. 沪深300走势图
注意:
- 数据,是使用我公开放在gitee上的csv文件,直接读取就能用。(ssl要加import,否则报错。)
- 中文,要加配置参数mpl
- mac和windows字体不同,mac可以用Arial Unicode MS,windows用SimHei
点评:
- 生成的图,没有动态交互效果。
- 如果只是想看趋势,这是很方便的方法,如果要细致查看图表细节,那最好用其他绘图包。
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#正常显示画图时出现的中文
from pylab import mpl
#这里使用微软雅黑字体
# mpl.rcParams['font.sans-serif']=['SimHei'] # win
mpl.rcParams["font.sans-serif"] = ["Arial Unicode MS"] #mac
import ssl # # URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1091)>
ssl._create_default_https_context = ssl._create_unverified_context
# 读取数据
sh300 = pd.read_csv("https://gitee.com/kelvin11/public-resources/raw/master/510300.csv", parse_dates=['date'], index_col='date')
sh300['close'].plot(figsize=(12,6))
plt.title('沪深300ETF走势图')
plt.xlabel('日期')
plt.show()
1.1.2. 日线+均线图
很多股票软件,都能看到比如5日均线,自己画也很容易。
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#正常显示画图时出现的中文
from pylab import mpl
#这里使用微软雅黑字体
# mpl.rcParams['font.sans-serif']=['SimHei'] # win
mpl.rcParams["font.sans-serif"] = ["Arial Unicode MS"] #mac
#画图时显示负号
mpl.rcParams['axes.unicode_minus']=False
import ssl # # URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1091)>
ssl._create_default_https_context = ssl._create_unverified_context
sh300 = pd.read_csv("https://gitee.com/kelvin11/public-resources/raw/master/510300.csv", parse_dates=['date'], index_col='date')
#这里的平均线是通过自定义函数,手动设置5,20,52,252日均线
#移动平均线:
ma_day = [5,20,52,252]
for ma in ma_day:
column_name = "%s日均线" %(str(ma))
sh300[column_name] = sh300["close"].rolling(ma).mean()
#画出2023年以来收盘价和均线图
sh300.loc['2023-01-01':][["close","5日均线","20日均线","52日均线","252日均线"]].plot(figsize=(12,6))
plt.title('沪深300走势+均线图')
plt.xlabel('日期')
plt.show()
1.1.3. 收益率与风险
我们使用年化收益率和标准差来横向收益率和风险。
数据还是直接从qstock获取,进行统一加工。
按照年来分组计算收益率。
import pandas as pd
import numpy as np
import qstock as qs
import matplotlib.pyplot as plt
# 正常显示画图时出现的中文
from pylab import mpl
# 字体
# mpl.rcParams['font.sans-serif']=['SimHei'] # win
mpl.rcParams["font.sans-serif"] = ["Arial Unicode MS"] #mac
# 画图时显示负号
mpl.rcParams['axes.unicode_minus']=False
import ssl # # URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1091)>
ssl._create_default_https_context = ssl._create_unverified_context
# 定义本次要比较的股票代码
stocks_info = [
{'code': '510300', 'name': '沪深300'},
{'code': '510500', 'name': '中证500'},
{'code': '512010', 'name': '医药ETF'},
{'code': '512000', 'name': '券商ETF'},
{'code': '516160', 'name': '新能源ETF'},
{'code': '510800', 'name': '红利ETF'},
{'code': '518880', 'name': '黄金ETF'},
{'code': '512200', 'name': '房地产ETF'}
]
# 获取基础数据
for stock in stocks_info:
df = qs.get_data(stock['code']) # 从qstock获取对应的股票历史数据
stock['history_df'] = df # 将其存在 history_df 这个key里面。
# 只保留收盘价,合并数据。列名是stocks_info里的name,行名是日期
df_all = pd.DataFrame()
for stock in stocks_info:
df = stock['history_df']
df = df[['close']] # 只需要 date 和 close 2列就行了。
df.rename(columns={'close': stock['name']}, inplace=True) # 用股票的名字来重命名close列
if df_all.size == 0:
df_all = df
else:
df_all = df_all.join(df) # join是按照index来连接的。
# print(df_all)
# 开始计算年化收益率,按年分组计算
# 退化日期到年
yearly_pct_change = df_all.pct_change().to_period('A') # 这里可以看到输出的结果,其实就是把index的年月日格式,都变成年份,以方便后面的groupby分组计算。
# 按年分组,滚动计算收益率
y_ret = (yearly_pct_change.groupby(yearly_pct_change.index).apply(lambda x: ((1+x).cumprod()-1).iloc[-1])).round(4)
print('年分组滚动收益率:\n', y_ret)
# 计算年化收益率和标准差。这就是用来衡量收益率和风险的指标。
ret = []
vol = []
for row_name in stocks_info:
row_values = y_ret[row_name['name']]
ret.append(round(row_values.dropna().mean()*100,3))
vol.append(round(row_values.dropna().std()*100))
print('ret结果:\n', ret)
print('vol结果:\n', vol)
color=np.array([ 0.18, 0.96, 0.75, 0.3, 0.9, 0.5, 0.2, 0.6]) # 这里有多少支股票,就写多少个色值
plt.scatter(ret, vol, marker = 'o', c=color, s = 500, cmap=plt.get_cmap('Spectral'))
plt.xlabel("年化收益率%")
plt.ylabel("标准差%")
stocks_names = list(map(lambda item:item['name'], stocks_info))
for label,x,y in zip(stocks_names,ret,vol):
plt.annotate(label, xy = (x,y), xytext = (20,20),textcoords = "offset points",
ha = "right",va = "bottom",
bbox = dict(boxstyle = 'round,pad=0.5',fc = 'yellow', alpha = 0.5),
arrowprops = dict(arrowstyle = "->",connectionstyle = "arc3,rad=0"))
年分组滚动收益率:
沪深300 中证500 医药ETF 券商ETF 新能源ETF 红利ETF 黄金ETF 房地产ETF
date
2012 -0.0235 NaN NaN NaN NaN NaN NaN NaN
2013 -0.0756 0.1043 -0.0042 NaN NaN NaN -0.0952 NaN
2014 0.6888 0.3900 0.0335 NaN NaN NaN 0.0152 NaN
2015 0.0782 0.4757 0.3441 NaN NaN NaN -0.0746 NaN
2016 -0.1120 -0.1640 -0.0090 -0.0112 NaN NaN 0.1810 NaN
2017 0.2643 0.0056 0.2523 -0.0671 NaN NaN 0.0273 0.0179
2018 -0.2626 -0.3245 -0.1942 -0.2434 NaN -0.2092 0.0436 -0.2699
2019 0.4246 0.2818 0.4277 0.4488 NaN 0.4099 0.1975 0.3031
2020 0.3066 0.2345 0.5844 0.1776 NaN 0.2925 0.1377 -0.0663
2021 -0.0408 0.1725 -0.1571 -0.0343 0.4019 -0.0620 -0.0538 -0.0640
2022 -0.2044 -0.1835 -0.2449 -0.2618 -0.2727 -0.1679 0.0942 -0.1045
2023 -0.0968 -0.0625 -0.1402 0.0385 -0.3465 -0.0961 0.1661 -0.2694
ret结果:
[7.89, 8.454, 8.113, 0.589, -7.243, 2.787, 5.809, -6.473]
vol结果:
[28, 25, 28, 23, 41, 26, 11, 19]
从下面绘制出来的图,我们能知道什么信息?
- 新能源,真的是毫无节操,收益率平均最低,而且波动还最大!
- 券商,等于就是白玩,所以如果要持有,一定一定要高抛低吸!
- 沪深300,中证500,医药这样的标的,长期肯定是能挣钱的!而且还不低,平均能到8%!!!
- 黄金,收益率好,且波动率低,but,通过课外只是可知,这是跟康波周期有关,只是这货的波动,可能是十几二十年这样为周期,还是少碰为好!一旦套牢,不知何时才能回来的那种。反正我是不看的。
1.1.4. 成交量与涨跌的关系
import pandas as pd
import numpy as np
import matplotlib as plt
# 正常显示画图时出现的中文
from pylab import mpl
mpl.rcParams["font.sans-serif"] = ["Arial Unicode MS"] #mac, win使用 SimHei
import ssl # # URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1091)>
ssl._create_default_https_context = ssl._create_unverified_context
# 读取数据
sh300 = pd.read_csv("https://gitee.com/kelvin11/public-resources/raw/master/510300.csv", parse_dates=['date'], index_col='date')
# sh300['volume'] = round(sh300['volume']/100000.0,2) # 数据太大,同比例缩小
sh300["日收益率"] = round(sh300["close"].pct_change()*100,2) # 其实原数据已经有了,只是为了印证。
# 调研,“日收益率”和volume成交量的关系。成交量作为横坐标,日收益率为纵坐标。
# 想知道,成交量放大时,一般是上涨还是下跌居多?
sh300.plot.scatter(x="volume", y="日收益率")
# 统计,当volume>10000时,上涨和下跌的个数统计
print('成交量均值=', int(sh300['volume'].mean()))
sample = sh300[sh300['volume'] > sh300['volume'].mean()][['volume','日收益率']]
sample.loc[sample['日收益率'] > 0, '正负'] = '正'
sample.loc[sample['日收益率'] < 0, '正负'] = '负'
sample.loc[sample['日收益率'] == 0, '正负'] = '平'
counts = sample['正负'].value_counts()
print(counts)
print('当放量(volume高于其平均值)时,上涨的概率是=', int(counts['正']/counts.sum()*100), '%')
# 如果是上4分位,看一下效果
p75 = np.percentile(sh300['volume'].values, q=75) # q=75,表示计算的数字结果,可以超过 >75% 的样本值
print('\n成交量75%百分位=', p75)
sample = sh300[sh300['volume'] > p75][['volume','日收益率']]
sample.loc[sample['日收益率'] > 0, '正负'] = '正'
sample.loc[sample['日收益率'] < 0, '正负'] = '负'
sample.loc[sample['日收益率'] == 0, '正负'] = '平'
counts = sample['正负'].value_counts()
print(counts)
print('当放量(75%百分位)时,上涨的概率是=', int(counts['正']/counts.sum()*100), '%')
成交量均值= 4315678
正负
正 528
负 455
平 10
n 1
Name: count, dtype: int64
当放量(volume高于其平均值)时,上涨的概率是= 53 %
成交量75%百分位= 5304913.0
正负
正 362
负 334
平 8
n 1
Name: count, dtype: int64
当放量(75%百分位)时,上涨的概率是= 51 %
通过计算的结果可见,不管是通过计算均值以上的放量,还是75百分位以上的放量,上涨的概率都不高。(这个针对沪深300而言的结论)
1.1.5. 线性回归模型拟合sns.lmplot
这个听起来比较高大上,实际就是一个函数的运用,类似于在K线图上划线,有点经验的老司机应该都是能画出来的,而且更有的放矢。
后面还会针对这个,更进一步分析我们的划线y=ax+b,其中的a、b的计算方式。
再之后,还会尝试通过手动划线(知道2个点,计算其直线的斜率,再换算为年化收益率,这才更有所谓的底。)
import seaborn as sns
import pandas as pd
import ssl # # URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1091)>
ssl._create_default_https_context = ssl._create_unverified_context
sh300 = pd.read_csv("https://gitee.com/kelvin11/public-resources/raw/master/510300.csv", parse_dates=['date'], index_col='date')
sh300['day'] = np.arange(0, 2819, 1)
sns.set_style("white")
gridobj = sns.lmplot(x="day", y="close", data=sh300,
ci=95, scatter_kws={'color': 'orange'}, line_kws={'color': 'green'}, markers='o')
1.1.6. 皮尔森相关性系数sns.jointplot
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import ssl # # URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1091)>
ssl._create_default_https_context = ssl._create_unverified_context
stock_index=pd.DataFrame()
sh300 = pd.read_csv("https://gitee.com/kelvin11/public-resources/raw/master/510300.csv", parse_dates=['date'], index_col='date')
stock_index['510300'] = sh300['close']
sh500 = pd.read_csv("https://gitee.com/kelvin11/public-resources/raw/master/510500.csv", parse_dates=['date'], index_col='date')
stock_index['510500'] = sh500['close']
# print(stock_index)
#计算这些股票指数每日涨跌幅(第0行的数据不要了:2012-05-28是无法计算的,之前没有数据)
tech_rets = stock_index.pct_change()[1:]
# print('收益率:\n', tech_rets)
#jointplot这个函数可以画出两个指数的”相关性系数“,或者说皮尔森相关系数
sns.jointplot(x='510300',y='510500', data=tech_rets)
# sns.pairplot(tech_rets.iloc[:,:].dropna())
plt.show()
结果如下图。可见:510300和510500,基本是属于正相关的。同时也可以看出,这个图,是一个定性的描述,我们还可以通过其他方式计算出相关系数,并画出热力图heatmap。就是下一例。
1.1.7. 相关性热力图pandas.corr() + sns.heatmap
import qstock as qs
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
#正常显示画图时出现的中文
from pylab import mpl
#这里使用微软雅黑字体
mpl.rcParams["font.sans-serif"] = ["Arial Unicode MS"] #mac
# 沪深300, 中证500, 医药ETF, 券商ETF, 新能源ETF, 红利ETF, 黄金ETF, 房地产ETF
stocks_info = [
{'code': '510300', 'name': '沪深300'},
{'code': '510500', 'name': '中证500'},
{'code': '512010', 'name': '医药ETF'},
{'code': '512000', 'name': '券商ETF'},
{'code': '516160', 'name': '新能源ETF'},
{'code': '510800', 'name': '红利ETF'},
{'code': '518880', 'name': '黄金ETF'},
{'code': '512200', 'name': '房地产ETF'}
]
for stock in stocks_info:
df = qs.get_data(stock['code']) # 从qstock获取对应的股票历史数据
stock['history_df'] = df # 将其存在 history_df 这个key里面。
# 只保留收盘价,合并数据
df_all = pd.DataFrame()
for stock in stocks_info:
df = stock['history_df']
df = df[['close']] # 只需要 date 和 close 2列就行了。
df.rename(columns={'close': stock['name']}, inplace=True) # 用股票的名字来重命名close列
if df_all.size == 0:
df_all = df
else:
df_all = df_all.join(df) # join是按照index来连接的。
# df_all.dropna(how='any') # 计算相关性的时候,应该是已经去掉了NaN。这一句,加不加,效果一样
# Plot
plt.figure(figsize=(12,10), dpi= 80)
sns.heatmap(df_all.corr(), xticklabels=df_all.corr().columns, yticklabels=df_all.corr().columns, cmap='RdYlGn', center=0, annot=True)
# Decorations
plt.title('指数相关性图', fontsize=22)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.show()
通过下图可以看出,如果追求极致的负相关,应考虑新能源和黄金。不过一般我们可能主流投资沪深300,那找沪深300的负相关,应该最佳是房地产ETF,不过还是要根据自己的实际情况来选择。
比如如果选沪深300和中证500,他俩相关系数是0.81,相关性还是很高的。
1.1.8. 树形图-持仓占比
import pandas as pd
import matplotlib.pyplot as plt
#正常显示画图时出现的中文
from pylab import mpl
#win使用微软雅黑字体,mac使用Arial Unicode MS
mpl.rcParams["font.sans-serif"] = ["Arial Unicode MS"] #mac
import squarify
json1 = {
"农业ETF":{"close": 2.6},
"沪深300":{"close": 7},
"券商ETF":{"close": 10},
"医药ETF":{"close": 16},
"新能源ETF":{"close": 12}
}
df = pd.DataFrame(json1).T
# Prepare Data
labels = df.index
sizes = df['close']
colors = [plt.cm.Spectral(i/float(len(labels))) for i in range(len(labels))]
# Draw Plot
plt.figure(figsize=(12,8), dpi= 80)
squarify.plot(sizes=sizes, label=labels, color=colors, alpha=.8)
# Decorate
plt.title('占比树形图')
plt.axis('off')
plt.show()
1.1.9. 柱状图or条形图
import pandas as pd
import matplotlib.pyplot as plt
#正常显示画图时出现的中文
from pylab import mpl
#win使用微软雅黑字体,mac使用Arial Unicode MS
mpl.rcParams["font.sans-serif"] = ["Arial Unicode MS"] #mac
import random
# Prepare Data
json1 = {
"农业ETF":{"close": 2.6},
"沪深300":{"close": 7},
"券商ETF":{"close": 10},
"医药ETF":{"close": 16},
"新能源ETF":{"close": 12}
}
df = pd.DataFrame(json1).T
all_colors = list(plt.cm.colors.cnames.keys())
random.seed(100)
c = random.choices(all_colors, k=df.size)
# Plot Bars
plt.figure(figsize=(16,10), dpi= 80)
plt.bar(df.index, df['close'], color=c, width=.5)
for i, val in enumerate(df['close'].values):
plt.text(i, val, float(val), horizontalalignment='center', verticalalignment='bottom', fontdict={'fontweight':500, 'size':12})
# Decoration
plt.gca().set_xticklabels(df.index, rotation=60, horizontalalignment= 'right')
plt.title("柱状图or条形图", fontsize=22)
plt.ylabel('仓位值')
plt.ylim(0, 20)
plt.show()
1.1.10. 时间序列图 折线图
todo:辅助Y轴;2个不同尺度的标准化。
import pandas as pd
import matplotlib.pyplot as plt
#正常显示画图时出现的中文
from pylab import mpl
mpl.rcParams["font.sans-serif"] = ["Arial Unicode MS"] #mac
import ssl # # URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1091)>
ssl._create_default_https_context = ssl._create_unverified_context
# Import Data
df = pd.read_csv("https://gitee.com/kelvin11/public-resources/raw/master/510300.csv")
df2 = pd.read_csv("https://gitee.com/kelvin11/public-resources/raw/master/510500.csv")
# Draw Plot
plt.figure(figsize=(16,10), dpi= 80)
plt.plot('date', 'close', data=df, color='tab:red')
plt.plot('date', 'close', data=df2, color='tab:green')
# Decoration
plt.ylim(0, 10)
xtick_location = df.index.tolist()[::250] # 在list上每隔250个数据取1个数
xtick_labels = [x[0:4] for x in df.date.tolist()[::250]]
plt.xticks(ticks=xtick_location, labels=xtick_labels, rotation=0, fontsize=12, horizontalalignment='center', alpha=.7)
plt.yticks(fontsize=12, alpha=.7)
plt.title("时间序列图 折线图", fontsize=22)
plt.grid(axis='both', alpha=.3)
# Remove borders
plt.gca().spines["top"].set_alpha(0.0)
plt.gca().spines["bottom"].set_alpha(0.3)
plt.gca().spines["right"].set_alpha(0.0)
plt.gca().spines["left"].set_alpha(0.3)
plt.show()
1.1.11. 未堆积的面积图 (Area Chart UnStacked)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#正常显示画图时出现的中文
from pylab import mpl
#win使用微软雅黑字体,mac使用Arial Unicode MS
mpl.rcParams["font.sans-serif"] = ["Arial Unicode MS"] #mac
df1 = pd.read_csv("https://gitee.com/kelvin11/public-resources/raw/master/SH510300-close.csv")
df2 = pd.read_csv("https://gitee.com/kelvin11/public-resources/raw/master/SH510500-close.csv")
df = df1.merge(df2, on="date", how="outer").dropna()
# df['date'] = pd.to_datetime(df['date'])
print(df)
# Prepare Data
x = df['date'].values.tolist()
y1 = df['close_x'].values.tolist()
y2 = df['close_y'].values.tolist()
mycolors = ['tab:red', 'tab:blue', 'tab:green', 'tab:orange', 'tab:brown', 'tab:grey', 'tab:pink', 'tab:olive']
columns = ['中证500', '沪深300']
# Draw Plot
fig, ax = plt.subplots(1, 1, figsize=(16,9), dpi= 80)
ax.fill_between(x, y1=y1, y2=0, label=columns[1], alpha=0.5, color=mycolors[1], linewidth=2)
ax.fill_between(x, y1=y2, y2=0, label=columns[0], alpha=0.5, color=mycolors[0], linewidth=2)
# Decorations
ax.set_title('未堆积的面积图 (Area Chart UnStacked)', fontsize=18)
ax.set(ylim=[0, 10])
ax.legend(loc='best', fontsize=12)
plt.xticks(x[::260], fontsize=10, horizontalalignment='center')
plt.yticks(np.arange(0, 10.0, 2.0), fontsize=10)
plt.xlim(-5, x[-1]) # 这里是什么意思呢?
print(x[-1])
# Draw Tick lines
for y in np.arange(0, 10.0, 2.0):
plt.hlines(y, xmin=0, xmax=len(x), colors='black', alpha=0.3, linestyles="--", lw=0.5)
# Lighten borders
plt.gca().spines["top"].set_alpha(0)
plt.gca().spines["bottom"].set_alpha(.3)
plt.gca().spines["right"].set_alpha(0)
plt.gca().spines["left"].set_alpha(.3)
plt.show()
date close_x close_y
195 2013/3/15 2.1684 3.0215
196 2013/3/18 2.1452 2.9717
197 2013/3/19 2.1641 2.9904
198 2013/3/20 2.2301 3.0683
199 2013/3/21 2.2361 3.0994
... ... ... ...
2792 2023/11/20 3.6450 5.7580
2793 2023/11/21 3.6500 5.7410
2794 2023/11/22 3.6110 5.6680
2795 2023/11/23 3.6300 5.7190
2796 2023/11/24 3.6050 5.6730
[2600 rows x 3 columns]
2023/11/24
1.1.12. 日历图-收盘价高低热分布
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#正常显示画图时出现的中文
from pylab import mpl
#win使用微软雅黑字体,mac使用Arial Unicode MS
mpl.rcParams["font.sans-serif"] = ["Arial Unicode MS"] #mac
import calmap
# Import Data
df = pd.read_csv("https://gitee.com/kelvin11/public-resources/raw/master/SH510300-close.csv", parse_dates=['date'])
df.set_index('date', inplace=True)
# Plot
plt.figure(figsize=(16,10), dpi= 80)
calmap.calendarplot(df.loc['2020']['close'],
fig_kws={'figsize': (16,10)},
yearlabel_kws={'color':'black', 'fontsize':14},
subplot_kws={'title': '沪深300 2022年日历图'})
calmap.calendarplot(df.loc['2021']['close'],
fig_kws={'figsize': (16,10)},
yearlabel_kws={'color':'black', 'fontsize':14},
subplot_kws={'title': '沪深300 2022年日历图'})
calmap.calendarplot(df.loc['2022']['close'],
fig_kws={'figsize': (16,10)},
yearlabel_kws={'color':'black', 'fontsize':14},
subplot_kws={'title': '沪深300 2022年日历图'})
calmap.calendarplot(df.loc['2023']['close'],
fig_kws={'figsize': (16,10)},
yearlabel_kws={'color':'black', 'fontsize':14},
subplot_kws={'title': '沪深300 2022年日历图'})
plt.show()
1.1.13. 热力图sns.heatmap-逐年收益率
将多只股票的收盘价按照年度sum,按年度查看各股票年收益率的热力图
import pandas as pd
import qstock as qs
import seaborn as sns
import matplotlib.pyplot as plt
#正常显示画图时出现的中文
from pylab import mpl
#这里使用微软雅黑字体
mpl.rcParams["font.sans-serif"] = ["Arial Unicode MS"] #mac
stocks_info = [
{'code': '510300', 'name': '沪深300'},
{'code': '510500', 'name': '中证500'},
{'code': '512010', 'name': '医药ETF'},
{'code': '512000', 'name': '券商ETF'},
{'code': '516160', 'name': '新能源ETF'},
{'code': '510800', 'name': '红利ETF'},
# {'code': '518880', 'name': '黄金ETF'},
# {'code': '512200', 'name': '房地产ETF'}
]
for stock in stocks_info:
df = qs.get_data(stock['code']) # 从qstock获取对应的股票历史数据
stock['history_df'] = df # 将其存在 history_df 这个key里面。
# 准备计算数据
df_all = pd.DataFrame()
for stock in stocks_info:
df = stock['history_df']
df = df[['close']] # 只需要 date 和 close 2列就行了。
df.rename(columns={'close': stock['name']}, inplace=True) # 用股票的名字来重命名close列
if df_all.size == 0:
df_all = df
else:
df_all = df_all.join(df) # join是按照index来连接的。
print(df_all)
# 退化日期到年
yearly_pct_change = df_all.pct_change().to_period('A')
print('退化日期到年:', yearly_pct_change) # 这里可以看到输出的结果,其实就是把index的年月日格式,都变成年份,以方便后面的groupby分组计算。
# 按年分组,滚动计算收益率
y_ret = (yearly_pct_change.groupby(yearly_pct_change.index).apply(lambda x: ((1+x).cumprod()-1).iloc[-1])).round(4)
print('年分组滚动收益率:', y_ret)
plt.figure(figsize=(10, 8))
plt.title('年化统计')
sns.heatmap(y_ret*100, annot=True, linewidths=0.5)
沪深300 中证500 医药ETF 券商ETF 新能源ETF 红利ETF
date
2012-05-28 2.004 NaN NaN NaN NaN NaN
2012-05-29 2.044 NaN NaN NaN NaN NaN
2012-05-30 2.036 NaN NaN NaN NaN NaN
2012-05-31 2.030 NaN NaN NaN NaN NaN
2012-06-01 2.030 NaN NaN NaN NaN NaN
... ... ... ... ... ... ...
2023-12-28 3.489 5.475 0.410 0.863 0.650 1.035
2023-12-29 3.499 5.519 0.411 0.864 0.645 1.035
2024-01-02 3.453 5.497 0.407 0.854 0.631 1.024
2024-01-03 3.443 5.490 0.405 0.855 0.634 1.026
2024-01-04 3.413 5.446 0.401 0.847 0.621 1.018
[2825 rows x 6 columns]
退化日期到年: 沪深300 中证500 医药ETF 券商ETF 新能源ETF 红利ETF
date
2012 NaN NaN NaN NaN NaN NaN
2012 0.019960 NaN NaN NaN NaN NaN
2012 -0.003914 NaN NaN NaN NaN NaN
2012 -0.002947 NaN NaN NaN NaN NaN
2012 0.000000 NaN NaN NaN NaN NaN
... ... ... ... ... ... ...
2023 0.024670 0.019173 0.017370 0.022512 0.069079 0.021718
2023 0.002866 0.008037 0.002439 0.001159 -0.007692 0.000000
2024 -0.013147 -0.003986 -0.009732 -0.011574 -0.021705 -0.010628
2024 -0.002896 -0.001273 -0.004914 0.001171 0.004754 0.001953
2024 -0.008713 -0.008015 -0.009877 -0.009357 -0.020505 -0.007797
[2825 rows x 6 columns]
年分组滚动收益率: 沪深300 中证500 医药ETF 券商ETF 新能源ETF 红利ETF
date
2012 -0.0235 NaN NaN NaN NaN NaN
2013 -0.0756 0.1043 -0.0042 NaN NaN NaN
2014 0.6888 0.3900 0.0335 NaN NaN NaN
2015 0.0782 0.4757 0.3441 NaN NaN NaN
2016 -0.1120 -0.1640 -0.0090 -0.0112 NaN NaN
2017 0.2643 0.0056 0.2523 -0.0671 NaN NaN
2018 -0.2626 -0.3245 -0.1942 -0.2434 NaN -0.2092
2019 0.4246 0.2818 0.4277 0.4488 NaN 0.4099
2020 0.3066 0.2345 0.5844 0.1776 NaN 0.2925
2021 -0.0408 0.1725 -0.1571 -0.0343 0.4019 -0.0620
2022 -0.2044 -0.1835 -0.2449 -0.2618 -0.2727 -0.1679
2023 -0.0968 -0.0625 -0.1402 0.0385 -0.3465 -0.0961
2024 -0.0246 -0.0132 -0.0243 -0.0197 -0.0372 -0.0164
1.2. pyecharts
1.2.1. 官网
手册:https://pyecharts.org/#/zh-cn/intro
中文样例:https://gallery.pyecharts.org/#/README
apache 网站样例:https://echarts.apache.org/examples/en/index.html
图例:
1.2.2. 收盘价走势图(折线图)
#导入数据分析和量化常用库
import pandas as pd
import numpy as np
import talib as ta
import ssl # # URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1091)>
ssl._create_default_https_context = ssl._create_unverified_context
#导入pyecharts
from pyecharts.charts import *
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
df = pd.read_csv("https://gitee.com/kelvin11/public-resources/raw/master/SH510300-close.csv", parse_dates=['date'])
df.set_index('date', inplace=True)
g=(Line()
.add_xaxis(df.index.strftime('%Y%m%d').tolist())
.add_yaxis('',df.close))
g.render_notebook()
#导入数据分析和量化常用库
import pandas as pd
import numpy as np
import talib as ta
import ssl # # URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1091)>
ssl._create_default_https_context = ssl._create_unverified_context
#导入pyecharts
from pyecharts.charts import *
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
# 读取 沪深300ETF 存量数据
sh = pd.read_csv("https://gitee.com/kelvin11/public-resources/raw/master/SH510300-close.csv", parse_dates=['date'])
sh.set_index('date', inplace=True)
#不同点位设置不同颜色
des=sh.close.describe()
print(des)
v1,v2,v3=np.ceil(des['25%']),np.ceil(des['50%']),np.ceil(des['75%'])
pieces=[{"min": v3, "color": "red"},
{"min": v2, "max": v3, "color": "blue"},
{"min": v1, "max": v2, "color": "black"},
{"max": v1, "color": "green"},]
#链式调用作用域()
g = (
Line({'width':'100%','height':'480px'})#设置画布大小,px像素
.add_xaxis(xaxis_data=sh.index.strftime('%Y%m%d').tolist())#x数据
.add_yaxis(
series_name="",#序列名称
y_axis=sh.close.values.tolist(),#添加y数据
is_smooth=True, #平滑曲线
is_symbol_show=False,#不显示折线的小圆圈
label_opts=opts.LabelOpts(is_show=False),
linestyle_opts=opts.LineStyleOpts(width=2),#线宽
markpoint_opts=opts.MarkPointOpts(data=[#添加标记符
opts.MarkPointItem(type_='max', name='最大值'),
opts.MarkPointItem(type_='min', name='最小值'),],symbol_size=[100,30]),
markline_opts=opts.MarkLineOpts(#添加均值辅助性
data=[opts.MarkLineItem(type_="average")], ))
.set_global_opts(#全局参数设置
title_opts=opts.TitleOpts(title='沪深300ETF走势', subtitle='2012年-2023年',pos_left='center'),
tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="cross"),
visualmap_opts=opts.VisualMapOpts(#视觉映射配置
orient = "horizontal",split_number = 4,
pos_left='center',is_piecewise=True,
pieces=pieces,),)
.set_series_opts(
markarea_opts=opts.MarkAreaOpts(#标记区域配置项
data=[
# opts.MarkAreaItem(name="牛市", x=("20050606", "20071016")),
opts.MarkAreaItem(name="14-15牛市", x=("20140624", "20150612")),
],)))
#使用jupyter notebook显示图形
g.render_notebook()
1.2.3. 柱状图1 - 沪深300年化收益率
#导入数据分析和量化常用库
import pandas as pd
import numpy as np
import talib as ta
import ssl # # URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1091)>
ssl._create_default_https_context = ssl._create_unverified_context
#导入pyecharts
from pyecharts.charts import *
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
df = pd.read_csv("https://gitee.com/kelvin11/public-resources/raw/master/SH510300-close.csv", parse_dates=['date'])
df.set_index('date', inplace=True)
df = df[['close']] # 只要close列的数据
#指数年度收益率柱状图
index_ret = df.pct_change() # 日收益率计算的另一种方法: index_ret=df/df.shift(1)-1
ss=index_ret.to_period('Y') # index,仅保留年
print('ss值是: \n', ss)
ss
sss=(ss.groupby(ss.index).apply(lambda x: ((1+x).cumprod()-1).iloc[-1])*100).round(2) # iloc[-1]表示最后一行
# np.array([1,3,5,7,9]).cumprod() 结果是 array([ 1, 3, 15, 105, 945])
print(sss)
g=(Bar()
.add_xaxis(sss.index.strftime('%Y').tolist())
.add_yaxis("", sss['close'].tolist())
.set_global_opts(#全局参数设置
title_opts=opts.TitleOpts(title='沪深300ETF年化走势', subtitle='2012年-2023年',pos_left='center'))
)
g.render_notebook()
ss值是:
close
date
2012 NaN
2012 0.015395
2012 -0.003041
2012 -0.002288
2012 0.000000
... ...
2023 0.003027
2023 0.001372
2023 -0.010685
2023 0.005262
2023 -0.006887
[2797 rows x 1 columns]
close
date
2012 -1.72
2013 -5.86
2014 53.76
2015 6.84
2016 -9.71
2017 23.37
2018 -24.15
2019 38.60
2020 29.08
2021 -4.02
2022 -20.14
2023 -7.02
1.2.4. 柱状图2 - 沪深300&中证500年化收益率
#导入数据分析和量化常用库
import pandas as pd
import numpy as np
import talib as ta
import ssl # # URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1091)>
ssl._create_default_https_context = ssl._create_unverified_context
#导入pyecharts
from pyecharts.charts import *
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
sh510300 = pd.read_csv("https://gitee.com/kelvin11/public-resources/raw/master/SH510300-close.csv", parse_dates=['date'])
sh510500 = pd.read_csv("https://gitee.com/kelvin11/public-resources/raw/master/SH510500-close.csv", parse_dates=['date'])
sh510300.columns = ['date', '沪深300']
sh510500.columns = ['date', '中证500']
df_all = pd.DataFrame()
df_all = sh510300.merge(sh510500, on='date', how="outer")
df_all.set_index('date', inplace=True)
#指数年度收益率柱状图
index_ret = df_all.pct_change() # 日收益率计算的另一种方法: index_ret=df/df.shift(1)-1
ss=index_ret.to_period('Y') # index,仅保留年
print('ss值是: \n', ss)
ss
sss=(ss.groupby(ss.index).apply(lambda x: ((1+x).cumprod()-1).iloc[-1])*100).round(2) # iloc[-1]表示最后一行
# np.array([1,3,5,7,9]).cumprod() 结果是 array([ 1, 3, 15, 105, 945])
print(sss)
g = (Bar()
.add_xaxis(sss.index.strftime('%Y').tolist())
.add_yaxis("沪深300", sss['沪深300'].tolist(),gap="0%")
.add_yaxis("中证500", sss['中证500'].tolist(),gap="0%")
#添加全局配置项
.set_global_opts(title_opts=opts.TitleOpts(title="指数月收益率"),
datazoom_opts=opts.DataZoomOpts(),#区域缩放配置项
yaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(formatter="{value}%")))
.set_series_opts(#添加序列配置项
label_opts=opts.LabelOpts(is_show=True,formatter='{c}%')))
g.width = "100%" #设置画布比例
g.render_notebook()
ss值是:
沪深300 中证500
date
2012 NaN NaN
2012 0.015395 NaN
2012 -0.003041 NaN
2012 -0.002288 NaN
2012 0.000000 NaN
... ... ...
2023 0.003027 0.005589
2023 0.001372 -0.002952
2023 -0.010685 -0.012716
2023 0.005262 0.008998
2023 -0.006887 -0.008043
[2797 rows x 2 columns]
沪深300 中证500
date
2012 -1.72 NaN
2013 -5.86 10.42
2014 53.76 39.03
2015 6.84 47.55
2016 -9.71 -16.39
2017 23.37 0.55
2018 -24.15 -32.45
2019 38.60 28.17
2020 29.08 23.45
2021 -4.02 17.25
2022 -20.14 -18.35
2023 -7.02 -3.64
1.2.5. 普通k线图
#导入数据分析和量化常用库
import pandas as pd
import numpy as np
import talib as ta
import ssl # # URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1091)>
ssl._create_default_https_context = ssl._create_unverified_context
#导入pyecharts
from pyecharts.charts import *
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
df = pd.read_csv("https://gitee.com/kelvin11/public-resources/raw/master/SH510300.csv", parse_dates=['date'])
df.set_index('date', inplace=True)
df['ma5']=df.close.rolling(5).mean()
df['ma20']=df.close.rolling(20).mean()
df['macd'],df['macdsignal'],df['macdhist']=ta.MACD(df.close,fastperiod=12,slowperiod=26,signalperiod=9)
g = (Kline()
.add_xaxis(df['2023':].index.strftime('%Y%m%d').tolist())
#y轴数据,默认open、close、low、high,转为list格式
.add_yaxis("",y_axis=df[['open', 'close', 'low', 'high']]['2023':].values.tolist())
)
g.render_notebook()
1.2.6. 可缩放k线图
#导入数据分析和量化常用库
import pandas as pd
import numpy as np
import talib as ta
import ssl # # URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1091)>
ssl._create_default_https_context = ssl._create_unverified_context
#导入pyecharts
from pyecharts.charts import *
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
df = pd.read_csv("https://gitee.com/kelvin11/public-resources/raw/master/SH510300.csv", parse_dates=['date'])
df.set_index('date', inplace=True)
df['ma5']=df.close.rolling(5).mean()
df['ma20']=df.close.rolling(20).mean()
df['macd'],df['macdsignal'],df['macdhist']=ta.MACD(df.close,fastperiod=12,slowperiod=26,signalperiod=9)
def draw_kline(data):
g = (Kline()
.add_xaxis(data.index.strftime('%Y%m%d').tolist())
#y轴数据,默认open、close、high、low,转为list格式
.add_yaxis(series_name="",
y_axis=data[['open', 'close', 'low', 'high']].values.tolist(),
itemstyle_opts=opts.ItemStyleOpts(
color="red",#阳线红色
color0="green",#阴线绿色
border_color="red",
border_color0="green",),
markpoint_opts=opts.MarkPointOpts(data=[#添加标记符
opts.MarkPointItem(type_='max', name='最大值'),
opts.MarkPointItem(type_='min', name='最小值'),]),
#添加辅助性,如某期间内最大max最小值min均值average
markline_opts=opts.MarkLineOpts(
data=[opts.MarkLineItem(type_="average",
value_dim="close")], ),)
.set_global_opts(
datazoom_opts=[opts.DataZoomOpts()],#滑动模块选择
title_opts=opts.TitleOpts(title="股票K线图",pos_left='center'),))
return g
draw_kline(df).render_notebook()
1.2.7. 散点图 - 历年收益率
#导入数据分析和量化常用库
import pandas as pd
import numpy as np
import talib as ta
import ssl # # URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1091)>
ssl._create_default_https_context = ssl._create_unverified_context
#导入pyecharts
from pyecharts.charts import *
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
sh510300 = pd.read_csv("https://gitee.com/kelvin11/public-resources/raw/master/SH510300-close.csv", parse_dates=['date'])
sh510500 = pd.read_csv("https://gitee.com/kelvin11/public-resources/raw/master/SH510500-close.csv", parse_dates=['date'])
sh510300.columns = ['date', '沪深300']
sh510500.columns = ['date', '中证500']
df_all = pd.DataFrame()
df_all = sh510300.merge(sh510500, on='date', how="outer")
df_all.set_index('date', inplace=True)
#指数年度收益率柱状图
index_ret = df_all.pct_change() # 日收益率计算的另一种方法: index_ret=df/df.shift(1)-1
ss=index_ret.to_period('Y') # index,仅保留年
print('ss值是: \n', ss)
ss
sss=(ss.groupby(ss.index).apply(lambda x: ((1+x).cumprod()-1).iloc[-1])*100).round(2) # iloc[-1]表示最后一行
# np.array([1,3,5,7,9]).cumprod() 结果是 array([ 1, 3, 15, 105, 945])
print(sss)
g = (
Scatter()
.add_xaxis([str(d) for d in sss.index.year])
.add_yaxis("沪深300(%)",sss['沪深300'].tolist())
.add_yaxis("中证500(%)", sss['中证500'].tolist())
.set_global_opts(
title_opts=opts.TitleOpts(title="指数历年收益率"),
visualmap_opts=opts.VisualMapOpts(type_="size", is_show=False),
xaxis_opts=opts.AxisOpts(type_="category",
axisline_opts=opts.AxisLineOpts(is_on_zero=False),
),
yaxis_opts=opts.AxisOpts(is_show=True,))
)
g.width = "100%"
g.render_notebook()
ss值是:
沪深300 中证500
date
2012 NaN NaN
2012 0.015395 NaN
2012 -0.003041 NaN
2012 -0.002288 NaN
2012 0.000000 NaN
... ... ...
2023 0.003027 0.005589
2023 0.001372 -0.002952
2023 -0.010685 -0.012716
2023 0.005262 0.008998
2023 -0.006887 -0.008043
[2797 rows x 2 columns]
沪深300 中证500
date
2012 -1.72 NaN
2013 -5.86 10.42
2014 53.76 39.03
2015 6.84 47.55
2016 -9.71 -16.39
2017 23.37 0.55
2018 -24.15 -32.45
2019 38.60 28.17
2020 29.08 23.45
2021 -4.02 17.25
2022 -20.14 -18.35
2023 -7.02 -3.64
1.2.8. 热力图heatmap - 沪深300各月收益率
#导入数据分析和量化常用库
import pandas as pd
import numpy as np
import talib as ta
import ssl # # URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1091)>
ssl._create_default_https_context = ssl._create_unverified_context
#导入pyecharts
from pyecharts.charts import *
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
df = pd.read_csv("https://gitee.com/kelvin11/public-resources/raw/master/SH510300-close.csv", parse_dates=['date'])
df.set_index('date', inplace=True)
index_price = df
heat_data=(index_price/index_price.shift(1)-1).to_period('M')
heat_data=heat_data.groupby(heat_data.index).apply(lambda x: ((((1+x).cumprod()-1).iloc[-1])*100).round(2))
heat_data=heat_data['2012':'2023']
print(heat_data)
# 开始年份
min_year = min(heat_data.index.year.tolist())
# 终止年份
max_year = max(heat_data.index.year.tolist())
# 年份数组
years = np.arange(min_year, max_year + 1, 1)
# 从 heat_data 取出对应年月的close值
value = [[i,j,heat_data['close'].get(str(min_year+i)+'-'+str(1+j))] for i in range(12) for j in range(max_year-min_year+1)]
month=[str(i)+'月' for i in range(1,13)] # 1月、2月、3月....12月
# 绘图
g = (HeatMap()
.add_xaxis([str(i) for i in years])
.add_yaxis("", month, value,
label_opts=opts.LabelOpts(is_show=True, position="inside"),)
.set_global_opts(
title_opts=opts.TitleOpts(title="沪深300月收益率(%)"),
visualmap_opts=opts.VisualMapOpts(is_show=False,min_=-30,max_=30,)))
g.render_notebook()
close
date
2012-05 1.00
2012-06 -5.51
2012-07 -4.59
2012-08 -5.40
2012-09 4.23
... ...
2023-07 5.20
2023-08 -6.00
2023-09 -1.95
2023-10 -3.19
2023-11 -1.07
[139 rows x 1 columns]
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。 如若内容造成侵权/违法违规/事实不符,请联系我的编程经验分享网邮箱:veading@qq.com进行投诉反馈,一经查实,立即删除!