数据诊断分析(均值方差)
- 均值方差检测是一种简单但有效的异常检测方法,主要基于样本的均值和方差的统计信息。该方法的核心思想是假设正常的样本点应该聚集在某个区域,而异常点则可能远离这个区域。
-
3
σ
3\sigma
3σ准则:是一种统计学中常用的规则,用于衡量数据集中的值离均值的距离。该规则基于正态分布的性质,提供了一个衡量数据集中值的离散程度的指标。
- 具体来说,
3
σ
3\sigma
3σ准则表明:
- 对于一个正态分布的数据集,约有 68% 的数据值会落在均值的一个标准差范围内。
- 约有 95% 的数据值会落在均值的两个标准差范围内。
- 约有 99.7% 的数据值会落在均值的三个标准差范围内。
Matlab代码实现
clear;clc;close all
load('.\data\daily_sia_7920.mat');
sia=min(cdr_sia./1e11);
for ii=1:42
sia_day(ii)=find(cdr_sia(:,ii)./1e11==sia(ii));
end
msia=mean(sia_day);
ssia=std(sia_day);
siz=25;lind=1.5;
x_0=0.10;
y_0=0.70;
len=0.85;
width=0.25;
d_x=0.32;
d_y=-0.27;
px=[0 0 0 0];
py=[0 1 2 3];
set(gcf,'color',[1 1 1],'position',[10 45 800 800*1.2]);
axes('position',[x_0+d_x*px(1), y_0+d_y*py(1), len, width]);
plot(sia_day,'k-*','linewidth',lind);hold on
plot([1 42],[msia,msia],'k-','linewidth',lind);hold on
plot([1 42],[msia+ssia,msia+ssia],'k--','linewidth',lind);hold on
plot([1 42],[msia-ssia,msia-ssia],'k--','linewidth',lind);hold on
C1=sia_day;C1(C1>msia-ssia)=nan;
C3=sia_day;C3(C3<msia+ssia)=nan;
scatter([1:42],C1,60,'b','filled','s');hold on
scatter([1:42],C3,60,'r','filled','^');hold on
set(gca,'linewidth',lind);grid on
set(gca,'xlim',[0 43],'xtick',1:5:42,'xticklabel','',...
'fontname','Times New Roman','FontSize',siz-10,'fontweight','bold')
set(gca,'ylim',[210 280],'ytick',210:20:280,'yticklabel',num2str([210:20:280]','
'Times New Roman','FontSize',siz-10,'fontweight','bold');
ylabel('Day of year (d)','fontname','Times New Roman',...
'FontSize',siz-10,'fontweight','bold');
hh=get(gca);
X=hh.XLim;
Y=hh.YLim;
k1=[0.03 0.8];
k2=[0.3 0.8];
k3=[0.03 0.9];
x_2=X(1)+k2(1)*(X(2)-X(1));
y_2=Y(1)+k2(2)*(Y(2)-Y(1));
x_3=X(1)+k3(1)*(X(2)-X(1));
y_3=Y(1)+k3(2)*(Y(2)-Y(1));
text(double(x_3),double(y_3),'(a)','color','k','fontname',...
'Times New Roman','fontweight','bold','fontsize',siz-10);
load('.\data\BFT_wind2_daily_7920.mat')
ue=u;
uc=mean(u);
stdc=std(u);
data1=ue-uc;
data2=ue-uc-stdc;
data3=ue-uc-2*stdc;
data1(data1<0)=nan;data1(~isnan(data1))=1;
data2(data2<0)=nan;data2(~isnan(data2))=1;
data3(data3<0)=nan;data3(~isnan(data3))=1;
mon=[31 28 31 30 31 30 31 31 30 31 30 31];
mona=[1,32,60,91,121,152,182,213,244,274,305,335];
monb=[31,59,90,120,151,181,212,243,273,304,334,365];
barmap1=[190 223 235;0 190 255;26 26 210]./255;
for mm=1:12
datam=nansum(data1(mona(mm):monb(mm),:),1);
datas1=nansum(data2(mona(mm):monb(mm),:),1);
datas2=nansum(data3(mona(mm):monb(mm),:),1);
datad0(mm,:)=datam-datas1;
datad1(mm,:)=datas1-datas2;
datad2(mm,:)=datas2;
end
data590=nansum(datad0(5:9,:),1);
data591=nansum(datad1(5:9,:),1);
data592=nansum(datad2(5:9,:),1);
axes('position',[x_0+d_x*px(2), y_0+d_y*py(2), len, width]);
ch=bar([data590;data591;data592]','stacked');
grid on
set(ch(1),'FaceColor',barmap1(1,:));
set(ch(2),'FaceColor',barmap1(2,:));
set(ch(3),'FaceColor',barmap1(3,:));
set(gca,'linewidth',lind,'ylim',[0 120]);
ylabel('Days yr^{-1}','fontname','Times New Roman','FontSize',siz-10,'fontweight','bold');
text(15,110,{'>0\sigma days'},'color',barmap1(1,:),'fontname',...
'Times New Roman','fontweight','bold','fontsize',siz-10);
text(15,100,{'≥1\sigma days'},'color',barmap1(2,:),'fontname',...
'Times New Roman','fontweight','bold','fontsize',siz-10);
text(15,90,{'≥2\sigma days'},'color',barmap1(3,:),'fontname',...
'Times New Roman','fontweight','bold','fontsize',siz-10);
set(gca,'xlim',[0 43],'xtick',[1:5:42],'xticklabel','','Fontname',...
'Times New Roman','FontSize',siz-10,'fontweight','bold')
hh=get(gca);
X=hh.XLim;
Y=hh.YLim;
k1=[0.03 0.8];
k2=[0.3 0.8];
k3=[0.03 0.9];
x_2=X(1)+k2(1)*(X(2)-X(1));
y_2=Y(1)+k2(2)*(Y(2)-Y(1));
x_3=X(1)+k3(1)*(X(2)-X(1));
y_3=Y(1)+k3(2)*(Y(2)-Y(1));
text(double(x_3),double(y_3),'(b)','color','k','fontname',...
'Times New Roman','fontweight','bold','fontsize',siz-10);
load('.\data\BFT_wind2_daily_7920.mat')
ue=u;
uc=mean(u);
stdc=std(u);
data1=ue-uc;
data2=ue-uc-stdc;
data3=ue-uc-2*stdc;
data1(data1<0)=nan;data1(~isnan(data1))=1;
data2(data2<0)=nan;data2(~isnan(data2))=1;
data3(data3<0)=nan;data3(~isnan(data3))=1;
barmap2=[178 38 38;255 27 255;221 159 221;255 195 204;187 143 143]./255;
for yy=1:42
for mm=1:12
events3=0;
for aa=mona(mm):monb(mm)-2
if ~isnan(data2(aa,yy)) & ~isnan(data2(aa+1,yy)) & ~isnan(data2(aa+2,yy))
events3=events3+1;
end
Events3(mm,yy)=events3;
end
events4=0;
for aa=mona(mm):monb(mm)-3
if ~isnan(data2(aa,yy)) & ~isnan(data2(aa+1,yy)) & ~isnan(data2(aa+2,yy)) & ~isnan(data2(aa+3,yy))
events4=events4+1;
end
Events4(mm,yy)=events4;
end
events5=0;
for aa=mona(mm):monb(mm)-4
if ~isnan(data2(aa,yy)) & ~isnan(data2(aa+1,yy)) & ~isnan(data2(aa+2,yy)) & ~isnan(data2(aa+3,yy))...
& ~isnan(data2(aa+4,yy))
events5=events5+1;
end
Events5(mm,yy)=events5;
end
events6=0;
for aa=mona(mm):monb(mm)-5
if ~isnan(data2(aa,yy)) & ~isnan(data2(aa+1,yy)) & ~isnan(data2(aa+2,yy)) & ~isnan(data2(aa+3,yy))...
& ~isnan(data2(aa+4,yy)) & ~isnan(data2(aa+5,yy))
events6=events6+1;
end
Events6(mm,yy)=events6;
end
events7=0;
for aa=mona(mm):monb(mm)-6
if ~isnan(data2(aa,yy)) & ~isnan(data2(aa+1,yy)) & ~isnan(data2(aa+2,yy)) & ~isnan(data2(aa+3,yy))...
& ~isnan(data2(aa+4,yy)) & ~isnan(data2(aa+5,yy)) & ~isnan(data2(aa+6,yy))
events7=events7+1;
end
Events7(mm,yy)=events7;
end
end
end
datas3=nansum(Events3(5:9,:),1);
datas4=nansum(Events4(5:9,:),1);
datas5=nansum(Events5(5:9,:),1);
datas6=nansum(Events6(5:9,:),1);
datas7=nansum(Events7(5:9,:),1);
axes('position',[x_0+d_x*px(3), y_0+d_y*py(3), len, width]);
ch=bar([datas3;datas4;datas5;datas6;datas7]','stacked');
grid on
set(ch(1),'FaceColor',barmap2(1,:));
set(ch(2),'FaceColor',barmap2(2,:));
set(ch(3),'FaceColor',barmap2(3,:));
set(ch(4),'FaceColor',barmap2(4,:));
set(ch(5),'FaceColor',barmap2(5,:));
set(gca,'linewidth',lind,'ylim',[0 60]);
ylabel('Events yr^{-1}','fontname','Times New Roman','FontSize',siz-10,'fontweight','bold');
text(15,58,{'3-days events'},'color',barmap2(1,:),'fontname',...
'Times New Roman','fontweight','bold','fontsize',siz-10);
text(15,53,{'4-days events'},'color',barmap2(2,:),'fontname',...
'Times New Roman','fontweight','bold','fontsize',siz-10);
text(15,48,{'5-days events'},'color',barmap2(3,:),'fontname',...
'Times New Roman','fontweight','bold','fontsize',siz-10);
text(15,43,{'6-days events'},'color',barmap2(4,:),'fontname',...
'Times New Roman','fontweight','bold','fontsize',siz-10);
text(15,38,{'7-days events'},'color',barmap2(5,:),'fontname',...
'Times New Roman','fontweight','bold','fontsize',siz-10);
set(gca,'xlim',[0 43],'xtick',[1:5:42],'xticklabel',1979:5:2020,'Fontname',...
'Times New Roman','FontSize',siz-10,'fontweight','bold')
hh=get(gca);
X=hh.XLim;
Y=hh.YLim;
k1=[0.03 0.8];
k2=[0.3 0.8];
k3=[0.03 0.9];
x_2=X(1)+k2(1)*(X(2)-X(1));
y_2=Y(1)+k2(2)*(Y(2)-Y(1));
x_3=X(1)+k3(1)*(X(2)-X(1));
y_3=Y(1)+k3(2)*(Y(2)-Y(1));
text(double(x_3),double(y_3),'(c)','color','k','fontname',...
'Times New Roman','fontweight','bold','fontsize',siz-10);

- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
- 100
- 101
- 102
- 103
- 104
- 105
- 106
- 107
- 108
- 109
- 110
- 111
- 112
- 113
- 114
- 115
- 116
- 117
- 118
- 119
- 120
- 121
- 122
- 123
- 124
- 125
- 126
- 127
- 128
- 129
- 130
- 131
- 132
- 133
- 134
- 135
- 136
- 137
- 138
- 139
- 140
- 141
- 142
- 143
- 144
- 145
- 146
- 147
- 148
- 149
- 150
- 151
- 152
- 153
- 154
- 155
- 156
- 157
- 158
- 159
- 160
- 161
- 162
- 163
- 164
- 165
- 166
- 167
- 168
- 169
- 170
- 171
- 172
- 173
- 174
- 175
- 176
- 177
- 178
- 179
- 180
- 181
- 182
- 183
- 184
- 185
- 186
- 187
- 188
- 189
- 190
- 191
- 192
- 193
- 194
- 195
- 196
- 197
- 198
- 199
- 200
- 201
- 202
- 203
- 204
- 205
- 206
- 207
- 208
- 209
- 210
结果展示
