量化交易系列【2】Pandas数据筛选及处理相关操作
示例中使用的‘000001.XSHE.csv’文件,已上传至csdn资源中,可直接下载
import pandas as pd
df = pd.read_csv('./000001.XSHE.csv')
df = df[:5]
df
| date | open | close | high | low | volume | money |
|---|
| 0 | 2015/1/5 | 9.98 | 10.00 | 10.17 | 9.74 | 458099037 | 4.565388e+09 |
|---|
| 1 | 2015/1/6 | 9.90 | 9.85 | 10.23 | 9.71 | 346952496 | 3.453446e+09 |
|---|
| 2 | 2015/1/7 | 9.72 | 9.67 | 9.88 | 9.55 | 272274401 | 2.634796e+09 |
|---|
| 3 | 2015/1/8 | 9.68 | 9.34 | 9.72 | 9.30 | 225445502 | 2.128003e+09 |
|---|
| 4 | 2015/1/9 | 9.30 | 9.42 | 9.91 | 9.19 | 401736419 | 3.835378e+09 |
|---|
df['code'] = '000001.XSHE'
筛选数据
df[df['code']=='000001.XSHE']
df[df['code'].isin(['000001.XSHE','000002.XSHE'])]
df[df['close'] >= 9.0]
| date | open | close | high | low | volume | money | code |
|---|
| 0 | 2015/1/5 | 9.98 | 10.00 | 10.17 | 9.74 | 458099037 | 4.565388e+09 | 000001.XSHE |
|---|
| 1 | 2015/1/6 | 9.90 | 9.85 | 10.23 | 9.71 | 346952496 | 3.453446e+09 | 000001.XSHE |
|---|
| 2 | 2015/1/7 | 9.72 | 9.67 | 9.88 | 9.55 | 272274401 | 2.634796e+09 | 000001.XSHE |
|---|
| 3 | 2015/1/8 | 9.68 | 9.34 | 9.72 | 9.30 | 225445502 | 2.128003e+09 | 000001.XSHE |
|---|
| 4 | 2015/1/9 | 9.30 | 9.42 | 9.91 | 9.19 | 401736419 | 3.835378e+09 | 000001.XSHE |
|---|
df['date'] = pd.to_datetime(df['date'])
df[(df.date>='2015-01-05') & (df.date<='2015-01-07')]
| date | open | close | high | low | volume | money | code |
|---|
| 0 | 2015-01-05 | 9.98 | 10.00 | 10.17 | 9.74 | 458099037 | 4.565388e+09 | 000001.XSHE |
|---|
| 1 | 2015-01-06 | 9.90 | 9.85 | 10.23 | 9.71 | 346952496 | 3.453446e+09 | 000001.XSHE |
|---|
| 2 | 2015-01-07 | 9.72 | 9.67 | 9.88 | 9.55 | 272274401 | 2.634796e+09 | 000001.XSHE |
|---|
df.dropna(how='any')
df.dropna(subset=['涨跌幅','MACD死叉’],how='all') # 表示 '涨跌幅','MACD死叉’这两个值均为空就删除改行
补全缺失值fillna
df.fillna(value='0')
df['MACD金叉死叉'].fillna(value=df['收盘价'], inplace=True)
df.fillna(method='ffill')
df.fillna(method='bfill')
df[df['涨跌幅'].notnull()]
df[df['涨跌幅'].isnull()]
排序函数
df.reset_index(inplace=True)
df.sort_values(by=['交易日期'],ascending=True)
df.sort_values(by=['股票名称','交易日期'], ascending=[True, True])
合并操作
df1 = df.iloc[0:2]
df1
| date | open | close | high | low | volume | money |
|---|
| 0 | 2015/1/5 | 9.98 | 10.00 | 10.17 | 9.74 | 458099037 | 4.565388e+09 |
|---|
| 1 | 2015/1/6 | 9.90 | 9.85 | 10.23 | 9.71 | 346952496 | 3.453446e+09 |
|---|
df2 = df.iloc[3:5]
df2
| date | open | close | high | low | volume | money |
|---|
| 3 | 2015/1/8 | 9.68 | 9.34 | 9.72 | 9.30 | 225445502 | 2.128003e+09 |
|---|
| 4 | 2015/1/9 | 9.30 | 9.42 | 9.91 | 9.19 | 401736419 | 3.835378e+09 |
|---|
df3 = df1.append(df2,ignore_index=True)
df3
| date | open | close | high | low | volume | money |
|---|
| 0 | 2015/1/5 | 9.98 | 10.00 | 10.17 | 9.74 | 458099037 | 4.565388e+09 |
|---|
| 1 | 2015/1/6 | 9.90 | 9.85 | 10.23 | 9.71 | 346952496 | 3.453446e+09 |
|---|
| 2 | 2015/1/8 | 9.68 | 9.34 | 9.72 | 9.30 | 225445502 | 2.128003e+09 |
|---|
| 3 | 2015/1/9 | 9.30 | 9.42 | 9.91 | 9.19 | 401736419 | 3.835378e+09 |
|---|
去除重复数据
df3.append(df2)
| date | open | close | high | low | volume | money |
|---|
| 0 | 2015/1/5 | 9.98 | 10.00 | 10.17 | 9.74 | 458099037 | 4.565388e+09 |
|---|
| 1 | 2015/1/6 | 9.90 | 9.85 | 10.23 | 9.71 | 346952496 | 3.453446e+09 |
|---|
| 2 | 2015/1/8 | 9.68 | 9.34 | 9.72 | 9.30 | 225445502 | 2.128003e+09 |
|---|
| 3 | 2015/1/9 | 9.30 | 9.42 | 9.91 | 9.19 | 401736419 | 3.835378e+09 |
|---|
| 3 | 2015/1/8 | 9.68 | 9.34 | 9.72 | 9.30 | 225445502 | 2.128003e+09 |
|---|
| 4 | 2015/1/9 | 9.30 | 9.42 | 9.91 | 9.19 | 401736419 | 3.835378e+09 |
|---|
df3.drop_duplicates(subset=['close','open'],keep='last',inplace=True)
df3
| date | open | close | high | low | volume | money |
|---|
| 0 | 2015/1/5 | 9.98 | 10.00 | 10.17 | 9.74 | 458099037 | 4.565388e+09 |
|---|
| 1 | 2015/1/6 | 9.90 | 9.85 | 10.23 | 9.71 | 346952496 | 3.453446e+09 |
|---|
| 2 | 2015/1/8 | 9.68 | 9.34 | 9.72 | 9.30 | 225445502 | 2.128003e+09 |
|---|
| 3 | 2015/1/9 | 9.30 | 9.42 | 9.91 | 9.19 | 401736419 | 3.835378e+09 |
|---|