############################################################################################################## ## ## This is R code to compute trends in Canadian monthly avg temperature highs ## Download data from http://ec.gc.ca/dccha-ahccd/default.asp?lang=en&n=70E82601-1 ## Unzip into the same relative folder as this code file. It should produce a folder called ## Homog_monthly_max_temp/. The code can figure the rest out. ## ############################################################################################################## # GET STATION NAMES AND CODES # This requires install.packages("stringr", dependencies=TRUE) options(width = 150) rm(list=ls(all=TRUE)) # remove all objects in memory require(stringr) # will use to trim white space on strings require(plotrix) library(lattice) early_cutoff = 1978 # If we want to pick only records going back at least this far start = 1978 # If we only want to use years since this min_endyr = 2015 # Must come up to this year + 1 ax = 1.5 # axis extent graph = 1 # Toggle this to do graphing st = read.csv(file = "Homog_monthly_max_temp/BTemperature_Stations.csv", skip=4, sep=",", stringsAsFactors = FALSE, header=FALSE) # ORGANIZE STATION VARIABLES station = str_trim(st$V2) province = st$V1 id = str_trim(st$V3) fromyr = as.numeric(st$V4) frommo = as.numeric(st$V5) endyr = as.numeric(st$V6) endmo = as.numeric(st$V7) lat = as.numeric(st$V8) lon = as.numeric(st$V9) elevation = as.numeric(st$V10) joined = str_trim(st$V11) K = length(id) # K = Number of stations rm(st) # remove V1-V10 # MAKE MONTH NAMES mn = c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec","Annual") # REPORT MATRIX Variables - 1st entries r_prov = "BC" r_stn = "wherever" r_lat = 0 r_lon = 0 r_elev = 0 r_earliest = 0 r_start = 0 r_month = "Mon" r_mnum = 0 r_trend = 0 r_pm = "C/decade +/- " r_ci = 0 r_code="something" keep = rep(0, times=13*K) # make indicator for non-zero entry # LOOP TO READ TEMPERATURE DATA BY STATION & COMPUTE TRENDS # START OF MAIN LOOP sink("logfiles/records.txt") cat("SUMMARY OF RECORDS BEGINNING AS OF", early_cutoff, "\n") for (i in 1:K) { if ((fromyr[i]min_endyr)) { # Record is long enough to use vf=matrix(0,nrow=1,ncol=13) # empty row matrices for vf, ci and sg entries ci=matrix(0,nrow=1,ncol=13) # sg=matrix(0,nrow=1,ncol=13) all=matrix(0,nrow=13,ncol=6) # READ A SINGLE LOCATION, DEAL WITH NA's AND ORGANIZE MONTHLY DATA fn=paste("Homog_monthly_max_temp/mx",id[i],".txt", sep="") # File name for read command st = read.csv(file = fn, skip=4, stringsAsFactors = FALSE, header=FALSE) st = subset(st,st$V1>start) # drop years prior to 'start' year = as.numeric(st$V1) Jan=ts(st$V2) # create monthly records as time series Feb=ts(st$V4) Mar=ts(st$V6) Apr=ts(st$V8) May=ts(st$V10) Jun=ts(st$V12) Jul=ts(st$V14) Aug=ts(st$V16) Sep=ts(st$V18) Oct=ts(st$V20) Nov=ts(st$V22) Dec=ts(st$V24) T = length(Jan) # length of sample t=c(1:T) # simple trend cat(" ID ", id[i] , station[i], " earliest year is ", fromyr[i], ", sample length = ", T, " years","\n") Jan[Jan==-9999.9]=NA # encode missing data Feb[Feb==-9999.9]=NA Mar[Mar==-9999.9]=NA Apr[Apr==-9999.9]=NA May[May==-9999.9]=NA Jun[Jun==-9999.9]=NA Jul[Jul==-9999.9]=NA Aug[Aug==-9999.9]=NA Sep[Sep==-9999.9]=NA Oct[Oct==-9999.9]=NA Nov[Nov==-9999.9]=NA Dec[Dec==-9999.9]=NA # use Nov and leading Jan # INTERPOLATE MISSING MONTHLY VALUES aa=cbind(Feb,lag(Dec),lag(Jan,-1)) # need lags and leads aa=aa[2:(T+1),] Jan[is.na(Jan)]=(aa[is.na(Jan),1]+aa[is.na(Jan),3])/2 # use Feb and lagged Dec Feb[is.na(Feb)]=(Jan[is.na(Feb)]+Mar[is.na(Feb)])/2 Mar[is.na(Mar)]=(Feb[is.na(Mar)]+Apr[is.na(Mar)])/2 Apr[is.na(Apr)]=(Mar[is.na(Apr)]+May[is.na(Apr)])/2 May[is.na(May)]=(Apr[is.na(May)]+Jun[is.na(May)])/2 Jun[is.na(Jun)]=(May[is.na(Jun)]+Jul[is.na(Jun)])/2 Jul[is.na(Jul)]=(Jun[is.na(Jul)]+Aug[is.na(Jul)])/2 Aug[is.na(Aug)]=(Jul[is.na(Aug)]+Sep[is.na(Aug)])/2 Sep[is.na(Sep)]=(Aug[is.na(Sep)]+Oct[is.na(Sep)])/2 Oct[is.na(Oct)]=(Sep[is.na(Oct)]+Nov[is.na(Oct)])/2 Nov[is.na(Nov)]=(Oct[is.na(Nov)]+Dec[is.na(Nov)])/2 Dec[is.na(Dec)]=(Nov[is.na(Dec)]+aa[is.na(Dec),3])/2 # use Nov and leading Jan # IF THERE ARE 2 OR MORE NAs IN SEQUENCE SET TO SAMPLE MEAN Jan[is.na(Jan)]=mean(!is.na(Jan)) Feb[is.na(Feb)]=mean(!is.na(Feb)) Mar[is.na(Mar)]=mean(!is.na(Mar)) Apr[is.na(Apr)]=mean(!is.na(Apr)) May[is.na(May)]=mean(!is.na(May)) Jun[is.na(Jun)]=mean(!is.na(Jun)) Jul[is.na(Jul)]=mean(!is.na(Jul)) Aug[is.na(Aug)]=mean(!is.na(Aug)) Sep[is.na(Sep)]=mean(!is.na(Sep)) Oct[is.na(Oct)]=mean(!is.na(Oct)) Nov[is.na(Nov)]=mean(!is.na(Nov)) Dec[is.na(Dec)]=mean(!is.na(Dec)) Ann=(Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov+Dec)/12 # Annual average # MATRIX OF MONTHLY MAX VALUES PUT ON DECADAL SCALE mmx=10*matrix( c(Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec, Ann), nrow=T, ncol=13 ) rm(st) # Tidy up work space # ROUTINE TO GENERATE HAC STANDARD ERRORS ON TRENDS beta1 = lm(t ~ 1) # partial out the t-terms ttilde1 = as.matrix(beta1$residuals) eta1 = sum( ttilde1^2 ) # partialling out term (aka inv(X'X) ) d = c(1, rep( 0 , times=T-1)) # create T-length vector = 1 followed by T-1 zeroes b = lm(mmx~year) # OLS trend regressions by month b_t = as.matrix(b$coefficients[2,]) # get slope coeffs r_t = as.matrix(b$residuals[,]) # Get residuals # DO THIS ONE MONTH AT A TIME for (j in 1:13) { allrow = (i-1)*13+j # row number of 'all' data frame keep[allrow]=1 # indicate that we will keep this row d = c(1, rep(0, times=(T-1) )) # create T-length vector = 1 followed by T-1 zeroes # VF TEST MATRIX u = t(r_t[,j]) # transpose resid vector S= u[1,1] # S = first col of u (= single entry since u is a vector) OMEGA = S %*% t(S) # First accum entry for Omega matrix for (om in 2:T) { # Loop to accumulate other Omega=SS' terms d[om]=1 # Vector to add 1st i cols of u (sets next element to 1) S = u %*% d # forms Nx1 vector with sum of 1st i cols of u OMEGA = OMEGA + S %*% t(S) # accumulates partial sums } OMEGA = 2*OMEGA / (T^2) # Last step # EXTRACT MONTHLY TREND CI's & VF SCORES + TREND DESCRIPTOR VF = eta1 * (b_t[j,1]) %*% (solve(OMEGA) ) %*% (b_t[j,1]) ci[1,j] = 6.482*abs( b_t[j,1]/chol(VF) ) # Confidence interval width vf[1,j] = VF # VF score code = " " if( (b_t[j,1]-ci[1,j]) >0) {code = " Warming"} else { if( (b_t[j,1]+ci[1,j]) <0) {code = " Cooling"} else {code=" No significant trend"} } # STORE REPORT OUTPUTS r_stn[allrow] = station[i] r_prov[allrow] = province[i] r_lat[allrow] = lat[i] r_lon[allrow] = lon[i] r_elev[allrow] = elevation[i] r_earliest[allrow] = fromyr[i] r_start[allrow] = start+1 r_month[allrow] = mn[j] r_mnum[allrow] = j r_trend[allrow] = round(b_t[j,1],3) r_pm[allrow] = "C/decade +/-" r_ci[allrow] = round(ci[1,j],3) r_code[allrow]=code } # month iteration # END OF MAIN LOOP } # End of 'if length' of record statement } # station iteration sink(file=NULL) # DATA SUMMARY MATRICES # EVERYTHING INCLUDING BLANK ROWS all_all = data.frame(r_stn, r_prov, r_mnum, r_lat, r_lon, r_elev, r_earliest, r_start, r_month, r_trend, r_pm, r_ci, r_code) # JUST ROWS SELECTED BY DATE keep_all = all_all[keep==1,] # Selected interval warming = as.numeric((keep_all$r_trend - keep_all$r_ci) > 0) # Indicator for significant warming cooling = as.numeric((keep_all$r_trend + keep_all$r_ci) < 0) # Indicator for significant cooling alldata = data.matrix(data.frame( keep_all$r_lat, keep_all$r_lon, keep_all$r_elev, keep_all$r_earliest, keep_all$r_start, keep_all$r_mnum, keep_all$r_trend, keep_all$r_ci, warming, cooling)) rownames(alldata) = paste(keep_all$r_prov,keep_all$r_stn) # Write data to CSV file write.csv(keep_all, file="logfiles/allcities.csv") # WRITE REPORT FILE AND CONSTRUCT TREND/CI GRAPHABLES nstat = nrow(keep_all)/13 trend_by_month=rep(0, times=12) ci_by_month=rep(0, times=12) sink("logfiles/DataSummary.txt") cat("===========================================================================================================================","\n") cat("Canada-wide summary using",nstat,"stations with data back to", early_cutoff, "selecting data after", start, "\n") for (j in 1:13) { # ONE MONTH AT A TIME sel = subset(alldata, alldata[,6]==j) # Select data for this month trend_by_month[j]=round(mean(sel[,7]),2) # Select mean trend coeff ci_by_month[j] = 2*sd(sel[,7]) # Select 2-sd wr_m = round(mean(sel[,9]),3) cl_m = round(mean(sel[,10]),3) cat("\n",mn[j],"Summary: avg trend =", trend_by_month[j], "median =", round(median(sel[,7]),2), "C/decade; fraction warming =", wr_m, "; fraction cooling =", cl_m) } cat("\n","\n") cat("===========================================================================================================================","\n") print(keep_all) sink(file=NULL) # GRAPHING SUBCOMPONENT if (graph>0) { monthnum=alldata[,6] b_jan=alldata[monthnum==1,7] # get vector of trends for each month b_feb=alldata[monthnum==2,7] b_mar=alldata[monthnum==3,7] b_apr=alldata[monthnum==4,7] b_may=alldata[monthnum==5,7] b_jun=alldata[monthnum==6,7] b_jul=alldata[monthnum==7,7] b_aug=alldata[monthnum==8,7] b_sep=alldata[monthnum==9,7] b_oct=alldata[monthnum==10,7] b_nov=alldata[monthnum==11,7] b_dec=alldata[monthnum==12,7] b_ann=alldata[monthnum==13,7] b_all=data.frame(b_jan,b_feb,b_mar,b_apr,b_may,b_jun,b_jul,b_aug,b_sep,b_oct,b_nov,b_dec, b_ann) latitude=alldata[monthnum==13,1] longitude=alldata[monthnum==13,2] elevation=alldata[monthnum==13,3] earliest=alldata[monthnum==13,4] since = alldata[monthnum==13,5] sb_all = stack(b_all[,1:12]) # stack monthly trends for xyplot sb_all$latitude = rep(latitude, 12) sb_all$rname = rep(mn[1:12], each=nstat) names(sb_all)=c("Trend", "Var", "Latitude", "Month") cat("\n","Start year is", since[1], "\n") cat("Number of stations:", nstat, "\n") abc = rep(c("(a)","(b)","(c)","(d)","(e)","(f)","(g)","(h)","(i)","(j)","(k)","(l)"), each=nstat) mn2=paste(abc, sb_all$Month) print( xyplot(sb_all$Trend ~ sb_all$Latitude | factor(mn2), as.table=TRUE, xlim=c(40,80), ylim=c(-5,5), xlab="Monthly trends by latitude", ylab="deg C/decade", abline = list(h = 0, col = "grey"), type=c("p","smooth"), aspect=.5 ) ) pdf('graphics/temp_month.pdf') par(las=2) boxplot(b_all, outline=FALSE, ylim=c(-0.4*ax,0.8*ax), xaxt="n", cex=1.5, main=c("Trends since",start), ylab="deg C/decade", xlab="") axis(1, 1:13, labels=mn) abline(h=0, lty=1, col="gray") dev.off() pdf('graphics/temp_lat1.pdf') par(mfrow = c(3,1)) par(mar=c(2,5,4,5), xaxt="s", yaxt="s") plot(b_ann ~ latitude, ylim=c(-ax,ax), xlim=c(40,85), xlab="", ylab="deg C/decade", pch=16, col="gray", main=c("Trends since",start)) lines(lowess(latitude, b_ann, f=.5),lwd=2 ) abline(h=0, lty=1, col="gray") text(60,(-ax+.5),"Annual v Latitude", cex=1.4) par(mar=c(3,5,3,5), xaxt="s", yaxt="n") plot(b_ann ~ longitude, ylim=c(-ax,ax), xlim=c(-140,-50), xlab="Longitude", ylab="", pch=16, col="gray") lines(lowess(longitude, b_ann, f=.5),lwd=2 ) abline(h=0, lty=1, col="gray") text(-70,(-ax+.5),"Annual v Longitude", cex=1.4) par(mar=c(4,5,2,5), xaxt="s", yaxt="n") plot(b_ann ~ elevation, ylim=c(-ax,ax), xlim=c(0,1600), xlab="Elevation (m)", ylab="deg C/decade", pch=16, col="gray") lines(lowess(elevation, b_ann, f=.5),lwd=2 ) abline(h=0, lty=1, col="gray") text(1000,(-ax+.5),"Annual v Elevation", cex=1.4) dev.off() colour=rep("red", times=nstat) colour[b_ann < .5]="yellow" colour[b_ann < .1]="gray" colour[b_ann < -.1]="blue" colour[b_ann < -.5]="purple" h=order(b_ann) pdf('graphics/heatmap.pdf') plot(latitude[h]~longitude[h], xlab="Longitude", ylab="Latitude", col=colour, ylim=c(40,85), xlim=c(-140,-50), type="p", pch=16, cex=1.5) legend(-145, 87, legend=c( "< -0.5C", "-0.5 to -0.1C", "-0.1 to +0.1C", "+0.1 to +0.5C", "> +0.5C"), col=c("purple", "blue", "gray", "yellow", "red"), pch=16, cex=1) dev.off() }