國
立 政 治 大 學
‧
N a tio na
l C h engchi U ni ve rs it y
附錄 C: 方法一使用之程式碼
find_ flu_ shot.py
from NHI_toolbox import * import pickle
Pathname = list() Filename = list()
for j in range(1998,2013):
Pathname = list() Filename = list() for i in range(1,10):
pathname = ’/home/ym/NHI/Cohort/R20’ + str(i)+’_’ +str(j) filename = ’R20’+ str(i)+’_CD’+str(j)+’.DAT’
Pathname.append(pathname) Filename.append(filename) for i in range(10,26):
pathname = ’/home/ym/NHI/Cohort/R2’ + str(i)+’_’+str(j) filename = ’R2’+ str(i)+’_CD’+str(j)+’.DAT’
Pathname.append(pathname) Filename.append(filename)
‧ 國
立 政 治 大 學
‧
N a tio na
l C h engchi U ni ve rs it y
icd1 = ICD9_YEAR(j)[0][0]
icd2 = ICD9_YEAR(j)[1][0]
icd3 = ICD9_YEAR(j)[2][0]
D = dict()
for i in range(25):
fullname = Pathname[i]+’/’+Filename[i]
print fullname
for line in open(fullname,’r’):
ID = line[91:123]
Date = line[67:75]
Birthday = line[83:91]
if (line[icd1:icd1+4]==’V048’) or (line[icd2:icd2+4]==’V048’) or (line[icd3:icd3+4]==’V048’):
age = Age(Birthday,Date) if(not(ID in D)):
D[ID] = list()
D[ID].append([Date,’V048’,age])
print ’year: ’, j , ’has ’, len(D), ’cases’
filename = ’flu_shot_’+str(j)+’.pkl’
File = open(filename,’w’) pickle.dump(D,File)
File.close()
‧
from NHI_toolbox import * import pickle
Pathname = list() Filename = list()
flu_Set = set([’480’,’482’,’487’,’460’])
for j in range(1998,2013):
Pathname = list() Filename = list() for i in range(1,10):
pathname = ’/home/ym/NHI/Cohort/R20’ + str(i)+’_’ +str(j) filename = ’R20’+ str(i)+’_CD’+str(j)+’.DAT’
Pathname.append(pathname) Filename.append(filename) for i in range(10,26):
pathname = ’/home/ym/NHI/Cohort/R2’ + str(i)+’_’+str(j) filename = ’R2’+ str(i)+’_CD’+str(j)+’.DAT’
Pathname.append(pathname) Filename.append(filename) icd1 = ICD9_YEAR(j)[0][0]
icd2 = ICD9_YEAR(j)[1][0]
icd3 = ICD9_YEAR(j)[2][0]
D = dict()
for i in range(25):
fullname = Pathname[i]+’/’+Filename[i]
‧ 國
立 政 治 大 學
‧
N a tio na
l C h engchi U ni ve rs it y
print fullname
for line in open(fullname,’r’):
ID = line[91:123]
Date = line[67:75]
Birthday = line[83:91]
seak_set = set()
seak_set.add(line[icd1:icd1+3]) seak_set.add(line[icd2:icd2+3]) seak_set.add(line[icd3:icd3+3])
if len(seak_set.intersection(flu_Set))>=1:
age = Age(Birthday,Date) if(not(ID in D)):
D[ID] = list()
D[ID].append((Date,age))
print ’year: ’, j , ’has ’, len(D), ’cases’
filename = ’flu_seak_’+str(j)+’.pkl’
File = open(filename,’w’) pickle.dump(D,File)
File.close()
population.py
import pickle
for i in range(1998,2013):
filename = ’flu_shot_’+str(i)+’.pkl’
‧
A = pickle.load(fid) fid.close()
print ’Shot_child = ’, len(c) print ’Shot_adult = ’, len(a) print ’Shot_old = ’, len(o)
fid = open(filename2,’r’) A = pickle.load(fid)
fid.close()
‧ 國
立 政 治 大 學
‧
N a tio na
l C h engchi U ni ve rs it y
elif A[idx][0][1][0]<6:
c1.add(idx) else:
a1.add(idx)
print ’Seak_child = ’, len(c1) print ’Seak_adult = ’, len(a1) print ’Seak_old = ’, len(o1)
c2 = c1.difference(c) a2 = a1.difference(a) o2 = o1.difference(o)
print ’Seak_noshot_child = ’, len(c2) print ’Seak_noshot_adult = ’, len(a2) print ’Seak_noshot_old = ’, len(o2)
print ’=====================================’
people_ state.py
import pickle
import numpy as np import calendar
from scipy import stats from NHI_toolbox import *
All_stat = list() All_record = list()
‧
W_mean_record = list() P_value = list()
for i in range(2011,2012):
filename1 = ’/home/glophy/NHI/YOYO/flu_shot_’+str(i)+’.pkl’
filename2 = ’/home/glophy/NHI/YOYO/flu_seak_’+str(i)+’.pkl’
filename3 = ’/home/glophy/NHI/YOYO/flu_seak_’+str(i+1)+’.pkl’
fid1 = open(filename1,’r’) fid2 = open(filename2,’r’) fid3 = open(filename3,’r’) A1 = pickle.load(fid1) B1 = pickle.load(fid2) B2 = pickle.load(fid3) fid2.close()
fid2.close() fid3.close()
Shot_people = A1.keys()
Seak_people1 = set(B1.keys()) Seak_people2 = set(B2.keys()) n = len(Shot_people)
m = len(set(Seak_people1).difference(set(Shot_people))) print ’In year ’,i
print ’There are ’, n, ’ peoples who have flu shot’
print ’There are ’, m, ’ peoples who seak without flu shot’
print ’*********************************’
All_stat.append((n,m))
# Rc for child, Rm for middle age, Ro for old man Rc = np.zeros([4,n])
‧
Rm = np.zeros([4,n]) Ro = np.zeros([4,n]) t=0
for ID in Shot_people:
shot_day = A1[ID][0][0]
if ID in Seak_people1:
if A1[ID][0][2][0]<6:
for seakdata in B1[ID]:
seak_day = seakdata[0]
period = Age(shot_day,seak_day) if (period[1]>0):
j = period[1]/3 Rc[j:,t] += 1
elif A1[ID][0][2][0]>=60:
for seakdata in B1[ID]:
seak_day = seakdata[0]
period = Age(shot_day,seak_day) if (period[1]>0):
j = period[1]/3 Ro[j:,t] += 1 else:
for seakdata in B1[ID]:
seak_day = seakdata[0]
period = Age(shot_day,seak_day) if (period[1]>0):
j = period[1]/3 Rm[j:,t] += 1
‧
for seakdata in B2[ID]:
seak_day = seakdata[0]
period = Age(shot_day,seak_day) if (period[1]<12):
j = period[1]/3 Rc[j:,t] += 1
elif A1[ID][0][2][0]>=60:
for seakdata in B2[ID]:
seak_day = seakdata[0]
period = Age(shot_day,seak_day) if (period[1]<12):
j = period[1]/3 Ro[j:,t] += 1 else:
for seakdata in B2[ID]:
seak_day = seakdata[0]
period = Age(shot_day,seak_day) if (period[1]<12):
j = period[1]/3 Rm[j:,t] += 1
for k in range(4):
‧
rc[k] = np.mean(Rc[k,:]) rm[k] = np.mean(Rm[k,:]) ro[k] = np.mean(Ro[k,:])
ra[k] = float(rc[k]+rm[k]+ro[k])/3 mean_record.append([rc,rm,ro,ra]) All_record.append([Rc,Rm,Ro])
#compute the seak people without flu shot Shot_people = set(A1.keys())
Seak_people1 = set(B1.keys())
Seak_people1 = list(Seak_people1.difference(Shot_people)) Seak_people2 = set(B2.keys())
m = len(Seak_people1) WRc = np.zeros([4,m]) WRm = np.zeros([4,m]) WRo = np.zeros([4,m]) shot_day = str(i)+’1001’
t=0
for ID in Seak_people1:
if B1[ID][0][1][0]<6:
for seakdata in B1[ID]:
seak_day = seakdata[0]
period = Age(shot_day,seak_day) if (period[1]>0):
j = period[1]/3 WRc[j:,t] += 1
‧
for seakdata in B1[ID]:
seak_day = seakdata[0]
period = Age(shot_day,seak_day) if (period[1]>0):
j = period[1]/3 WRo[j:,t] += 1 else:
for seakdata in B1[ID]:
seak_day = seakdata[0]
period = Age(shot_day,seak_day) if (period[1]>0):
j = period[1]/3 WRm[j:,t] += 1
if ID in Seak_people2:
if B1[ID][0][1][0]<6:
for seakdata in B2[ID]:
seak_day = seakdata[0]
period = Age(shot_day,seak_day) if (period[1]<12):
j = period[1]/3 WRc[j:,t] += 1
elif B1[ID][0][1][0]>=60:
for seakdata in B2[ID]:
seak_day = seakdata[0]
period = Age(shot_day,seak_day) if (period[1]<12):
j = period[1]/3
‧ 國
立 政 治 大 學
‧
N a tio na
l C h engchi U ni ve rs it y
WRo[j:,t] += 1 else:
for seakdata in B2[ID]:
seak_day = seakdata[0]
period = Age(shot_day,seak_day) if (period[1]<12):
j = period[1]/3 WRm[j:,t] += 1 t+=1
wrc = [0 for k in range(4)]
wrm = rc[:]
wro = rc[:]
wra = rc[:]
for k in range(4):
wrc[k] = np.mean(WRc[k,:]) wrm[k] = np.mean(WRm[k,:]) wro[k] = np.mean(WRo[k,:])
wra[k] = float(wrc[k]+wrm[k]+wro[k])/3 W_mean_record.append([wrc,wrm,wro,wra]) W_All_record.append([WRc,WRm,WRo])
for k in range(4):
print ’The ’, 3*(k+1), ’ month’
‧
print ’The average times for middle people is ’, rm[k], wrm[k]
print ’The average times for old people is ’, ro[k], wro[k]
print ’...’
#stats.ttest_ind(rvs1,rvs2, equal_var = False) K = np.zeros([3,4])
for k in range(4):
pc = stats.ttest_ind(WRc[k,:],Rc[k,:],equal_var = False) pm = stats.ttest_ind(WRm[k,:],Rm[k,:],equal_var = False) po = stats.ttest_ind(WRo[k,:],Ro[k,:],equal_var = False) K[0,k] = pc[1]
K[1,k] = pm[1]
K[2,k] = po[1]
print ’The ’, 3*(k+1), ’ month’
print ’The p-value of Child is ’, pc[1]
print ’The p-value of middle people is ’, pm[1]
print ’The p-value of old people is ’, po[1]
print ’==========================================’
P_value.append(K)
output_file = ’Result.pkl’
fid = open(output_file,’w’) pickle.dump(All_stat,fid)
‧ 國
立 政 治 大 學
‧
N a tio na
l C h engchi U ni ve rs it y
find_ dead_ flu.py
from NHI_toolbox import * import pickle
Pathname = list() Filename = list()
dead_code = set(range(480,488))
for i in range(1998,2013):
if (i-1911)>90:
filename = ’/home/ym/NHI/Heavy/NHIRD-104-074/AN’+str(i-1911)+’02/HV’
+str(i)+’.DAT’
else:
filename = ’/home/ym/NHI/Heavy/NHIRD-104-074/AN’+str(i-1911)+’01/HV’
+str(i)+’.DAT’
icd9 = 32 D = set()
for line in open(filename,’r’):
if len(line)>300:
ID = line[0:32]
Birthday = line[39:47]
if (line[292] == ’Y’) or (line[292] == ’M’):
D.add(ID)
‧
File = open(filename,’w’) pickle.dump(D,File)
for i in range(1998,2012):
filename = ’flu_shot_’+str(i)+’.pkl’
filename1 = ’flu_dead_’+str(i)+’.pkl’
filename2 = ’flu_dead_’+str(i+1)+’.pkl’
filename3 = ’flu_seak_’+str(i)+’.pkl’
fid = open(filename,’r’) A = pickle.load(fid) fid.close()
fid = open(filename1,’r’) B = pickle.load(fid)
fid.close()
fid = open(filename2,’r’) C = pickle.load(fid)
fid.close()
fid = open(filename3,’r’) D = pickle.load(fid)
‧
ID_shot = set(A.keys()) ID_noshot = set(D.keys()) ID_dead1 = B
ID_dead2 = C
ID_dead = ID_dead1.union(ID_dead2)
ID_noshot = ID_noshot.difference(ID_shot) shot_dead_rate =
float(len(ID_shot.intersection(ID_dead)))/len(ID_shot)
noshot_dead_rate =
float(len(ID_noshot.intersection(ID_dead)))/len(ID_noshot) R.append((shot_dead_rate,noshot_dead_rate))
ID_shot = list(ID_shot) ID_shot_C = set()
ID_shot_A = set() ID_shot_O = set() for idx in ID_shot:
if A[idx][0][2][0] >=65:
ID_shot_O.add(idx) elif A[idx][0][2][0] <6:
ID_shot_C.add(idx) else:
ID_shot_A.add(idx) if len(ID_shot_C)==0:
rc = 0
‧
rc = float(len(ID_shot_C.intersection(ID_dead)))/len(ID_shot_C) ra = float(len(ID_shot_A.intersection(ID_dead)))/len(ID_shot_A) ro = float(len(ID_shot_O.intersection(ID_dead)))/len(ID_shot_O)
ID_noshot = list(ID_noshot) ID_noshot_C = set()
ID_noshot_A = set() ID_noshot_O = set() for idx in ID_noshot:
if D[idx][0][1][0]>=65:
rcn = float(len(ID_noshot_C.intersection(ID_dead)))/len(ID_noshot_C) ran = float(len(ID_noshot_A.intersection(ID_dead)))/len(ID_noshot_A) ron = float(len(ID_noshot_O.intersection(ID_dead)))/len(ID_noshot_O)
Old.append((ro,ron)) Chi.append((rc,rcn)) Adu.append((ra,ran))
print ’In year ’, i, print ’...’
‧ 國
立 政 治 大 學
‧
N a tio na
l C h engchi U ni ve rs it y
print ’the rate of shot_people_dead is’,
(shot_dead_rate)*100,’%’
print ’the rate of non_shot_people_dead is’, (noshot_dead_rate)*100,’%’
print ’rate of shot_child_dead is ’, rc*100,’%’
print ’rate of noshot_child_dead is ’,rcn*100,’%’
print ’rate of shot_adult_dead is ’, ra*100,’%’
print ’rate of noshot_adult_dead is ’, ran*100,’%’
print ’rate of shot_old_dead is ’, ro*100,’%’
print ’rate of noshot_old_dead is ’, ron*100,’%’
print ’=======================’
fid = open(’dead_stat.pkl’,’w’) pickle.dump(R,fid)
pickle.dump(Chi,fid) pickle.dump(Adu,fid) pickle.dump(Old,fid) fid.close()