1 The age of big data


We are getting more and more data, and this makes it easier to make false disoveries. We consider three main ways for these to arise.

1.1 Randomly (Spurious Correlation)

We begin with a motivating empirical example of “US Gov’t Spending on Science”. Get and inspect some data from https://tylervigen.com/spurious-correlations

## Your data is not made up in the computer (hopefully!)
## will normally be an address on your PC
vigen_csv <- read.csv( paste0(
'https://raw.githubusercontent.com/the-mad-statter/',
'whysospurious/master/data-raw/tylervigen.csv') ) 
class(vigen_csv)
## [1] "data.frame"
names(vigen_csv)
##  [1] "year"                         "science_spending"            
##  [3] "hanging_suicides"             "pool_fall_drownings"         
##  [5] "cage_films"                   "cheese_percap"               
##  [7] "bed_deaths"                   "maine_divorce_rate"          
##  [9] "margarine_percap"             "miss_usa_age"                
## [11] "steam_murders"                "arcade_revenue"              
## [13] "computer_science_doctorates"  "noncom_space_launches"       
## [15] "sociology_doctorates"         "mozzarella_percap"           
## [17] "civil_engineering_doctorates" "fishing_drownings"           
## [19] "kentucky_marriage_rate"       "oil_imports_norway"          
## [21] "chicken_percap"               "train_collision_deaths"      
## [23] "oil_imports_total"            "pool_drownings"              
## [25] "nuclear_power"                "japanese_cars_sold"          
## [27] "motor_vehicle_suicides"       "spelling_bee_word_length"    
## [29] "spider_deaths"                "math_doctorates"             
## [31] "uranium"
vigen_csv[1:5,1:5]
##   year science_spending hanging_suicides pool_fall_drownings cage_films
## 1 1996               NA               NA                  NA         NA
## 2 1997               NA               NA                  NA         NA
## 3 1998               NA               NA                  NA         NA
## 4 1999            18079             5427                 109          2
## 5 2000            18594             5688                 102          2
## similar `apply' functions
lapply(vigen_csv[,1:5], class) ## like apply, but for lists
## $year
## [1] "integer"
## 
## $science_spending
## [1] "integer"
## 
## $hanging_suicides
## [1] "integer"
## 
## $pool_fall_drownings
## [1] "integer"
## 
## $cage_films
## [1] "integer"
sapply(vigen_csv[,1:5], class) ## lapply, formatted to a vector
##                year    science_spending    hanging_suicides pool_fall_drownings 
##           "integer"           "integer"           "integer"           "integer" 
##          cage_films 
##           "integer"

The US government spending on science is ruining cinema (p<.001)!?

## Drop Data before 1999
vigen_csv <- vigen_csv[vigen_csv$year >= 1999,] 

## Run OLS Regression $
reg1 <-  lm(cage_films ~ -1 + science_spending, data=vigen_csv)
summary(reg1)
## 
## Call:
## lm(formula = cage_films ~ -1 + science_spending, data = vigen_csv)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7670 -0.7165  0.1447  0.7890  1.4531 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## science_spending 9.978e-05  1.350e-05    7.39 2.34e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.033 on 10 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.8452, Adjusted R-squared:  0.8297 
## F-statistic: 54.61 on 1 and 10 DF,  p-value: 2.343e-05

It’s not all bad, people in maine stay married longer?

plot.new()
plot.window(xlim=c(1999, 2009), ylim=c(7,9))
lines(log(maine_divorce_rate*1000)~year, data=vigen_csv)
lines(log(science_spending/10)~year, data=vigen_csv, lty=2)
axis(1)
axis(2)
legend('topright', lty=c(1,2), legend=c(
    'log(maine_divorce_rate*1000)',
    'log(science_spending/10)'))

par(mfrow=c(1,2), mar=c(2,2,2,1))
plot.new()
plot.window(xlim=c(1999, 2009), ylim=c(5,9)*1000)
lines(science_spending/3~year, data=vigen_csv, lty=1, col=2, pch=16)
text(2003, 8200, 'US spending on science, space, technology (USD/3)', col=2, cex=.6, srt=30)
lines(hanging_suicides~year, data=vigen_csv, lty=1, col=4, pch=16)
text(2004, 6500, 'US Suicides by hanging, strangulation, suffocation (Deaths)', col=4, cex=.6, srt=30)
axis(1)
axis(2)


plot.new()
plot.window(xlim=c(2002, 2009), ylim=c(0,5))
lines(cage_films~year, data=vigen_csv[vigen_csv$year>=2002,], lty=1, col=2, pch=16)
text(2006, 0.5, 'Number of films with Nicolas Cage (Films)', col=2, cex=.6, srt=0)
lines(pool_fall_drownings/25~year, data=vigen_csv[vigen_csv$year>=2002,], lty=1, col=4, pch=16)
text(2006, 4.5, 'Number of drownings by falling into pool (US Deaths/25)', col=4, cex=.6, srt=0)
axis(1)
axis(2)

## Include an intercept to regression 1
#reg2 <-  lm(cage_films ~ science_spending, data=vigen_csv)
#suppressMessages(library(stargazer))
#stargazer(reg1, reg2, type='html')

We now run IV regressions for different variable combinations in the dataset

library(fixest)
knames <- names(vigen_csv)[2:11] ## First 10 Variables
#knames <- names(vigen_csv)[-1] ## All Variables
p <- 1
ii <- 1
ivreg_list <- vector("list", factorial(length(knames))/factorial(length(knames)-3))

## Choose 3 variable
for( k1 in knames){
for( k2 in setdiff(knames,k1)){
for( k3 in setdiff(knames,c(k1,k2)) ){   
    X1 <- vigen_csv[,k1]
    X2 <- vigen_csv[,k2]
    X3 <- vigen_csv[,k3]
    ## Merge and `Analyze'        
    dat_i <- na.omit(data.frame(X1,X2,X3))
    ivreg_i <- feols(X1~1|X2~X3, data=dat_i)
    ivreg_list[[ii]] <- list(ivreg_i, c(k1,k2,k3))
    ii <- ii+1
}}}
pvals <- sapply(ivreg_list, function(ivreg_i){ivreg_i[[1]]$coeftable[2,4]})

plot(ecdf(pvals), xlab='p-value', ylab='CDF', main='Frequency IV is Statistically Significant')
abline(v=c(.01,.05), col=c(2,4))

## Most Significant Spurious Combinations
pvars <- sapply(ivreg_list, function(ivreg_i){ivreg_i[[2]]})
pdat <- data.frame(t(pvars), pvals)
pdat <- pdat[order(pdat$pvals),]
head(pdat)
##                     X1                 X2            X3        pvals
## 4     science_spending   hanging_suicides    bed_deaths 3.049883e-08
## 76    hanging_suicides   science_spending    bed_deaths 3.049883e-08
## 3     science_spending   hanging_suicides cheese_percap 3.344890e-08
## 75    hanging_suicides   science_spending cheese_percap 3.344890e-08
## 485 maine_divorce_rate   margarine_percap cheese_percap 3.997738e-08
## 557   margarine_percap maine_divorce_rate cheese_percap 3.997738e-08

For more intuition on spurious correlations, try http://shiny.calpoly.sh/Corr_Reg_Game/

1.2 Programming Errors

A huge amount of data normally means a huge amount of data cleaning/merging/aggregating. Some spurious results are driven by errors in this process, so be careful.

## Function to Create Sample Datasets
make_noisy_data <- function(n, b=0){
    x <- seq(1,10, length.out=n)
    e <- rnorm(length(x), mean=0, sd=10)
    y <- b*x + e
    xy_mat <- data.frame(ID=seq(x), x=x, y=y)
    return(xy_mat)
}

## Two sample datasets
dat1 <- make_noisy_data(6)
dat2 <- make_noisy_data(6)

## Merging data in wide format
dat_merged_wide <- merge(dat1, dat2,
    by='ID', suffixes=c('.1','.2'))

## merging data in long format and reshaping to wide
dat_merged_long <- rbind( cbind(dat1,DF=1),cbind(dat2,DF=2))
library(reshape2)
dat_melted <- melt(dat_merged_long, id.vars=c('ID', 'DF'))
dat_merged_wide2 <- dcast(dat_melted, ID~DF+variable)

## CHECK they are the same.
dat_merged_wide == dat_merged_wide2
##        ID  x.1  y.1  x.2  y.2
## [1,] TRUE TRUE TRUE TRUE TRUE
## [2,] TRUE TRUE TRUE TRUE TRUE
## [3,] TRUE TRUE TRUE TRUE TRUE
## [4,] TRUE TRUE TRUE TRUE TRUE
## [5,] TRUE TRUE TRUE TRUE TRUE
## [6,] TRUE TRUE TRUE TRUE TRUE

Nevertheless, data transformation is often necessary before regression analysis. For downloading tips, see https://raw.githubusercontent.com/rstudio/cheatsheets/main/data-import.pdf

1.3 P-Hacking

Another class of errors pertains to P-hacking (and it’s various synonyms)

set.seed(123)
n <- 50
X1 <- runif(n)

## Regression Machine:
## repeatebdly finds covariate, runs regression
## stops when statistically significant at .1\% 

p <- 1
i <- 0
while(p >= .001){ 
    ## Get Random Covariate
    X2 <-  runif(n)
    ## Merge and `Analyze'
    dat_i <- data.frame(X1,X2)
    reg_i <- lm(X1~X2, data=dat_i)
    ## update results in global environment
    p <- summary(reg_i)$coefficients[2,4]
    i <- i+1
}

plot(X1~X2, data=dat_i,
    pch=16, col=grey(.5,.5),
    main=paste0('Random Dataset ', i,": p=", round(p,4)))
abline(reg_i)

#summary(reg_i)
## P-hacking 2SLS
p <- 1
ii <- 0
set.seed(123)
while(p >= .05){
    ## Get Random Covariates
    X2 <-  runif(n)    
    X3 <-  runif(n)
    ## Create Treatment Variable based on Cutoff
    cutoffs <- seq(0,1,length.out=11)[-c(1,11)]
    for(tau in cutoffs){
        T3 <- 1*(X3 > tau)
        ## Merge and `Analyze'
        dat_i <- data.frame(X1,X2,T3)
        ivreg_i <- feols(X1~1|X2~T3, data=dat_i)
        ## Update results in global environment
        ptab <- summary(ivreg_i)$coeftable
        if( nrow(ptab)==2){
            p <- ptab[2,4]
            ii <- ii+1
        }
    }
}
summary(ivreg_i)
## TSLS estimation, Dep. Var.: X1, Endo.: X2, Instr.: T3
## Second stage: Dep. Var.: X1
## Observations: 50 
## Standard-errors: IID 
##              Estimate Std. Error       t value  Pr(>|t|)    
## (Intercept) -9.95e-14   1.28e-13 -7.750700e-01    0.4421    
## fit_X2       1.00e+00   2.46e-13  4.060978e+12 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 5.81e-14   Adj. R2: 1
## F-test (1st stage), X2: stat = 0.664884, p = 0.418869, on 1 and 48 DoF.
##             Wu-Hausman: stat = 0.232185, p = 0.632145, on 1 and 47 DoF.

2 Random Causal Impacts

We simply apply the three major credible methods (IV, RDD, DID) to random walks. We find a result that fits mold and add various extensions that make it appear robust. The analysis sounds scientific, and you could probably be convinced if it were not just random noise.

n <- 1000
n_index <- seq(n)

set.seed(1)
random_walk1 <- cumsum(runif(n,-1,1))

set.seed(2)
random_walk2 <- cumsum(runif(n,-1,1))

par(mfrow=c(1,2))
plot(random_walk1, pch=16, col=rgb(1,0,0,.25),
    xlab='Time', ylab='Random Value')
plot(random_walk2, pch=16, col=rgb(0,0,1,.25),
    xlab='Time', ylab='Random Value')

2.1 IV

## Searching for a valid instrument
p <- 1
ii <- 0
set.seed(3)
while(p >= .001){
    ## Get Random Covariates
    random_walk3 <- cumsum(runif(n,-1,1))
    ## Merge and `Analyze'
    dat_i <- data.frame(
        X1=random_walk1,
        X2=random_walk2,
        X3=random_walk3)
    ivreg_i <- feols(X1~1|X2~X3, data=dat_i)
    ## Update results in global environment
    ptab <- summary(ivreg_i)$coeftable
    if( nrow(ptab)==2){
        p <- ptab[2,4]
        ii <- ii+1
    }
}
summary(ivreg_i)
## TSLS estimation, Dep. Var.: X1, Endo.: X2, Instr.: X3
## Second stage: Dep. Var.: X1
## Observations: 1,000 
## Standard-errors: IID 
##             Estimate Std. Error  t value   Pr(>|t|)    
## (Intercept)  9.03248   0.893912 10.10444  < 2.2e-16 ***
## fit_X2       1.94350   0.251147  7.73850 2.4567e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 6.74813   Adj. R2: -1.66509
## F-test (1st stage), X2: stat =  46.0, p = 2.038e-11, on 1 and 998 DoF.
##             Wu-Hausman: stat = 137.3, p < 2.2e-16  , on 1 and 997 DoF.

2.2 RDD

## Let the data take shape
## (around the large differences before and after)
n1 <- 290
wind1 <- c(n1-300,n1+300)
dat1 <- data.frame(t=n_index, y=random_walk1, d=1*(n_index > n1))
dat1_sub <- dat1[ n_index>wind1[1] & n_index < wind1[2],]

## Then find your big break
reg0 <- lm(y~t, data=dat1_sub[dat1_sub$d==0,])
reg1 <- lm(y~t, data=dat1_sub[dat1_sub$d==1,])

## The evidence should show openly (it's just science)
plot(random_walk1, pch=16, col=rgb(0,0,1,.25),
    xlim=wind1, xlab='Time', ylab='Random Value')
abline(v=n1, lty=2)
lines(reg0$model$t, reg0$fitted.values, col=1)
lines(reg1$model$t, reg1$fitted.values, col=1)

## Dress with some statistics for added credibility
rdd_sub <- lm(y~d+t+d*t, data=dat1_sub)
rdd_full <- lm(y~d+t+d*t, data=dat1)
stargazer::stargazer(rdd_sub, rdd_full, 
    type='html',
    title='Recipe RDD',
    header=F,
    omit=c('Constant'),
    notes=c('First column uses a dataset around the discontinuity.',
    'Smaller windows are more causal, and where the effect is bigger.'))
Recipe RDD
Dependent variable:
y
(1) (2)
d -13.169*** -9.639***
(0.569) (0.527)
t 0.011*** 0.011***
(0.001) (0.002)
d:t 0.009*** 0.004*
(0.002) (0.002)
Observations 589 1,000
R2 0.771 0.447
Adjusted R2 0.770 0.446
Residual Std. Error 1.764 (df = 585) 3.081 (df = 996)
F Statistic 658.281*** (df = 3; 585) 268.763*** (df = 3; 996)
Note: p<0.1; p<0.05; p<0.01
First column uses a dataset around the discontinuity.
Smaller windows are more causal, and where the effect is bigger.

2.3 DID

## Find a reversal of fortune
## (A good story always goes well with a nice pre-trend)
n2 <- 318
wind2 <- c(n2-20,n2+20)
plot(random_walk2, pch=16, col=rgb(0,0,1,.5),
    xlim=wind2, ylim=c(-15,15), xlab='Time', ylab='Random Value')
points(random_walk1, pch=16, col=rgb(1,0,0,.5))
abline(v=n2, lty=2)

## Knead out any effects that are non-causal 
## (or even just correlation)
dat2A <- data.frame(t=n_index, y=random_walk1, d=1*(n_index > n2), RWid=1)
dat2B <- data.frame(t=n_index, y=random_walk2, d=0, RWid=2)
dat2  <- rbind(dat2A, dat2B)
dat2$RWid <- as.factor(dat2$RWid)
dat2$tid <- as.factor(dat2$t)
dat2_sub <- dat2[ dat2$t>wind2[1] & dat2$t < wind2[2],]

## Report the stars for all to enjoy
## (and remind that stable coefficients are the good ones)
did_fe1 <- lm(y~d+tid, data=dat2_sub)
did_fe2 <- lm(y~d+RWid, data=dat2_sub)
did_fe3 <- lm(y~d*RWid+tid, data=dat2_sub)
stargazer::stargazer(did_fe1, did_fe2, did_fe3,
    type='html',
    title='Recipe DID',
    header=F,
    omit=c('tid','RWid', 'Constant'),
    notes=c(
     'Fixed effects for time in column 1, for id in column 2, and both in column 3.',
     'Fixed effects control for most of your concerns.',
     'Anything else creates a bias in the opposite direction.'))
Recipe DID
Dependent variable:
y
(1) (2) (3)
d 1.804* 1.847*** 5.851***
(0.892) (0.652) (0.828)
Observations 78 78 78
R2 0.227 0.164 0.668
Adjusted R2 -0.566 0.142 0.309
Residual Std. Error 2.750 (df = 38) 2.035 (df = 75) 1.827 (df = 37)
F Statistic 0.287 (df = 39; 38) 7.379*** (df = 2; 75) 1.860** (df = 40; 37)
Note: p<0.1; p<0.05; p<0.01
Fixed effects for time in column 1, for id in column 2, and both in column 3.
Fixed effects control for most of your concerns.
Anything else creates a bias in the opposite direction.
LS0tCnRpdGxlOiAiRGF0YSBTY2llbnRpc20iCmF1dGhvcjogIkpvcmRhbiBBZGFtc29uIgpkYXRlOiAiYHIgZm9ybWF0KFN5cy50aW1lKCksICclZC4lbS4lWScpYCIKb3V0cHV0OgogIGh0bWxfZG9jdW1lbnQ6CiAgICAgICAgY29kZV9kb3dubG9hZDogdHJ1ZQogICAgICAgIHRvYzogdHJ1ZQogICAgICAgIHRvY19mbG9hdDogdHJ1ZQogICAgICAgIHRvY19jb2xsYXBzZWQ6IHRydWUKICAgICAgICB0b2NfZGVwdGg6IDIKICAgICAgICBudW1iZXJfc2VjdGlvbnM6IHRydWUKICAgICAgICBzZWxmX2NvbnRhaW5lZDogdHJ1ZQotLS0KCgoKCiMgVGhlIGFnZSBvZiBiaWcgZGF0YQoqKioKCldlIGFyZSBnZXR0aW5nIG1vcmUgYW5kIG1vcmUgZGF0YSwgYW5kIHRoaXMgbWFrZXMgaXQgZWFzaWVyIHRvIG1ha2UgZmFsc2UgZGlzb3Zlcmllcy4gV2UgY29uc2lkZXIgdGhyZWUgbWFpbiB3YXlzIGZvciB0aGVzZSB0byBhcmlzZS4KCgojIyBSYW5kb21seSAoU3B1cmlvdXMgQ29ycmVsYXRpb24pCgpXZSBiZWdpbiB3aXRoIGEgbW90aXZhdGluZyBlbXBpcmljYWwgZXhhbXBsZSBvZiAiVVMgR292J3QgU3BlbmRpbmcgb24gU2NpZW5jZSIuIEdldCBhbmQgaW5zcGVjdCBzb21lIGRhdGEgZnJvbSBodHRwczovL3R5bGVydmlnZW4uY29tL3NwdXJpb3VzLWNvcnJlbGF0aW9ucwoKYGBge3J9CiMjIFlvdXIgZGF0YSBpcyBub3QgbWFkZSB1cCBpbiB0aGUgY29tcHV0ZXIgKGhvcGVmdWxseSEpCiMjIHdpbGwgbm9ybWFsbHkgYmUgYW4gYWRkcmVzcyBvbiB5b3VyIFBDCnZpZ2VuX2NzdiA8LSByZWFkLmNzdiggcGFzdGUwKAonaHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL3RoZS1tYWQtc3RhdHRlci8nLAond2h5c29zcHVyaW91cy9tYXN0ZXIvZGF0YS1yYXcvdHlsZXJ2aWdlbi5jc3YnKSApIApjbGFzcyh2aWdlbl9jc3YpCm5hbWVzKHZpZ2VuX2NzdikKdmlnZW5fY3N2WzE6NSwxOjVdCgojIyBzaW1pbGFyIGBhcHBseScgZnVuY3Rpb25zCmxhcHBseSh2aWdlbl9jc3ZbLDE6NV0sIGNsYXNzKSAjIyBsaWtlIGFwcGx5LCBidXQgZm9yIGxpc3RzCnNhcHBseSh2aWdlbl9jc3ZbLDE6NV0sIGNsYXNzKSAjIyBsYXBwbHksIGZvcm1hdHRlZCB0byBhIHZlY3RvcgpgYGAKClRoZSBVUyBnb3Zlcm5tZW50IHNwZW5kaW5nIG9uIHNjaWVuY2UgaXMgcnVpbmluZyBjaW5lbWEKKHA8LjAwMSkhPwoKYGBge3J9CiMjIERyb3AgRGF0YSBiZWZvcmUgMTk5OQp2aWdlbl9jc3YgPC0gdmlnZW5fY3N2W3ZpZ2VuX2NzdiR5ZWFyID49IDE5OTksXSAKCiMjIFJ1biBPTFMgUmVncmVzc2lvbiAkCnJlZzEgPC0gIGxtKGNhZ2VfZmlsbXMgfiAtMSArIHNjaWVuY2Vfc3BlbmRpbmcsIGRhdGE9dmlnZW5fY3N2KQpzdW1tYXJ5KHJlZzEpCmBgYAoKCkl0J3Mgbm90IGFsbCBiYWQsIHBlb3BsZSBpbiBtYWluZSBzdGF5IG1hcnJpZWQgbG9uZ2VyPwoKYGBge3J9CnBsb3QubmV3KCkKcGxvdC53aW5kb3coeGxpbT1jKDE5OTksIDIwMDkpLCB5bGltPWMoNyw5KSkKbGluZXMobG9nKG1haW5lX2Rpdm9yY2VfcmF0ZSoxMDAwKX55ZWFyLCBkYXRhPXZpZ2VuX2NzdikKbGluZXMobG9nKHNjaWVuY2Vfc3BlbmRpbmcvMTApfnllYXIsIGRhdGE9dmlnZW5fY3N2LCBsdHk9MikKYXhpcygxKQpheGlzKDIpCmxlZ2VuZCgndG9wcmlnaHQnLCBsdHk9YygxLDIpLCBsZWdlbmQ9YygKICAgICdsb2cobWFpbmVfZGl2b3JjZV9yYXRlKjEwMDApJywKICAgICdsb2coc2NpZW5jZV9zcGVuZGluZy8xMCknKSkKYGBgCgoKYGBge3J9CnBhcihtZnJvdz1jKDEsMiksIG1hcj1jKDIsMiwyLDEpKQpwbG90Lm5ldygpCnBsb3Qud2luZG93KHhsaW09YygxOTk5LCAyMDA5KSwgeWxpbT1jKDUsOSkqMTAwMCkKbGluZXMoc2NpZW5jZV9zcGVuZGluZy8zfnllYXIsIGRhdGE9dmlnZW5fY3N2LCBsdHk9MSwgY29sPTIsIHBjaD0xNikKdGV4dCgyMDAzLCA4MjAwLCAnVVMgc3BlbmRpbmcgb24gc2NpZW5jZSwgc3BhY2UsIHRlY2hub2xvZ3kgKFVTRC8zKScsIGNvbD0yLCBjZXg9LjYsIHNydD0zMCkKbGluZXMoaGFuZ2luZ19zdWljaWRlc355ZWFyLCBkYXRhPXZpZ2VuX2NzdiwgbHR5PTEsIGNvbD00LCBwY2g9MTYpCnRleHQoMjAwNCwgNjUwMCwgJ1VTIFN1aWNpZGVzIGJ5IGhhbmdpbmcsIHN0cmFuZ3VsYXRpb24sIHN1ZmZvY2F0aW9uIChEZWF0aHMpJywgY29sPTQsIGNleD0uNiwgc3J0PTMwKQpheGlzKDEpCmF4aXMoMikKCgpwbG90Lm5ldygpCnBsb3Qud2luZG93KHhsaW09YygyMDAyLCAyMDA5KSwgeWxpbT1jKDAsNSkpCmxpbmVzKGNhZ2VfZmlsbXN+eWVhciwgZGF0YT12aWdlbl9jc3ZbdmlnZW5fY3N2JHllYXI+PTIwMDIsXSwgbHR5PTEsIGNvbD0yLCBwY2g9MTYpCnRleHQoMjAwNiwgMC41LCAnTnVtYmVyIG9mIGZpbG1zIHdpdGggTmljb2xhcyBDYWdlIChGaWxtcyknLCBjb2w9MiwgY2V4PS42LCBzcnQ9MCkKbGluZXMocG9vbF9mYWxsX2Ryb3duaW5ncy8yNX55ZWFyLCBkYXRhPXZpZ2VuX2Nzdlt2aWdlbl9jc3YkeWVhcj49MjAwMixdLCBsdHk9MSwgY29sPTQsIHBjaD0xNikKdGV4dCgyMDA2LCA0LjUsICdOdW1iZXIgb2YgZHJvd25pbmdzIGJ5IGZhbGxpbmcgaW50byBwb29sIChVUyBEZWF0aHMvMjUpJywgY29sPTQsIGNleD0uNiwgc3J0PTApCmF4aXMoMSkKYXhpcygyKQpgYGAKCgpgYGB7ciwgcmVzdWx0cz0nYXNpcycsIGV2YWw9Rn0KIyMgSW5jbHVkZSBhbiBpbnRlcmNlcHQgdG8gcmVncmVzc2lvbiAxCiNyZWcyIDwtICBsbShjYWdlX2ZpbG1zIH4gc2NpZW5jZV9zcGVuZGluZywgZGF0YT12aWdlbl9jc3YpCiNzdXBwcmVzc01lc3NhZ2VzKGxpYnJhcnkoc3RhcmdhemVyKSkKI3N0YXJnYXplcihyZWcxLCByZWcyLCB0eXBlPSdodG1sJykKYGBgCgoKV2Ugbm93IHJ1biBJViByZWdyZXNzaW9ucyBmb3IgZGlmZmVyZW50IHZhcmlhYmxlIGNvbWJpbmF0aW9ucyBpbiB0aGUgZGF0YXNldApgYGB7cn0KbGlicmFyeShmaXhlc3QpCmtuYW1lcyA8LSBuYW1lcyh2aWdlbl9jc3YpWzI6MTFdICMjIEZpcnN0IDEwIFZhcmlhYmxlcwoja25hbWVzIDwtIG5hbWVzKHZpZ2VuX2NzdilbLTFdICMjIEFsbCBWYXJpYWJsZXMKcCA8LSAxCmlpIDwtIDEKaXZyZWdfbGlzdCA8LSB2ZWN0b3IoImxpc3QiLCBmYWN0b3JpYWwobGVuZ3RoKGtuYW1lcykpL2ZhY3RvcmlhbChsZW5ndGgoa25hbWVzKS0zKSkKCiMjIENob29zZSAzIHZhcmlhYmxlCmZvciggazEgaW4ga25hbWVzKXsKZm9yKCBrMiBpbiBzZXRkaWZmKGtuYW1lcyxrMSkpewpmb3IoIGszIGluIHNldGRpZmYoa25hbWVzLGMoazEsazIpKSApeyAgIAogICAgWDEgPC0gdmlnZW5fY3N2WyxrMV0KICAgIFgyIDwtIHZpZ2VuX2NzdlssazJdCiAgICBYMyA8LSB2aWdlbl9jc3ZbLGszXQogICAgIyMgTWVyZ2UgYW5kIGBBbmFseXplJyAgICAgICAgCiAgICBkYXRfaSA8LSBuYS5vbWl0KGRhdGEuZnJhbWUoWDEsWDIsWDMpKQogICAgaXZyZWdfaSA8LSBmZW9scyhYMX4xfFgyflgzLCBkYXRhPWRhdF9pKQogICAgaXZyZWdfbGlzdFtbaWldXSA8LSBsaXN0KGl2cmVnX2ksIGMoazEsazIsazMpKQogICAgaWkgPC0gaWkrMQp9fX0KcHZhbHMgPC0gc2FwcGx5KGl2cmVnX2xpc3QsIGZ1bmN0aW9uKGl2cmVnX2kpe2l2cmVnX2lbWzFdXSRjb2VmdGFibGVbMiw0XX0pCgpwbG90KGVjZGYocHZhbHMpLCB4bGFiPSdwLXZhbHVlJywgeWxhYj0nQ0RGJywgbWFpbj0nRnJlcXVlbmN5IElWIGlzIFN0YXRpc3RpY2FsbHkgU2lnbmlmaWNhbnQnKQphYmxpbmUodj1jKC4wMSwuMDUpLCBjb2w9YygyLDQpKQoKIyMgTW9zdCBTaWduaWZpY2FudCBTcHVyaW91cyBDb21iaW5hdGlvbnMKcHZhcnMgPC0gc2FwcGx5KGl2cmVnX2xpc3QsIGZ1bmN0aW9uKGl2cmVnX2kpe2l2cmVnX2lbWzJdXX0pCnBkYXQgPC0gZGF0YS5mcmFtZSh0KHB2YXJzKSwgcHZhbHMpCnBkYXQgPC0gcGRhdFtvcmRlcihwZGF0JHB2YWxzKSxdCmhlYWQocGRhdCkKYGBgCgpGb3IgbW9yZSBpbnR1aXRpb24gb24gc3B1cmlvdXMgY29ycmVsYXRpb25zLCB0cnkgaHR0cDovL3NoaW55LmNhbHBvbHkuc2gvQ29ycl9SZWdfR2FtZS8KCgojIyBQcm9ncmFtbWluZyBFcnJvcnMKCkEgaHVnZSBhbW91bnQgb2YgZGF0YSBub3JtYWxseSBtZWFucyBhIGh1Z2UgYW1vdW50IG9mIGRhdGEgY2xlYW5pbmcvbWVyZ2luZy9hZ2dyZWdhdGluZy4gU29tZSBzcHVyaW91cyByZXN1bHRzIGFyZSBkcml2ZW4gYnkgZXJyb3JzIGluIHRoaXMgcHJvY2Vzcywgc28gYmUgY2FyZWZ1bC4KCmBgYHtyfQojIyBGdW5jdGlvbiB0byBDcmVhdGUgU2FtcGxlIERhdGFzZXRzCm1ha2Vfbm9pc3lfZGF0YSA8LSBmdW5jdGlvbihuLCBiPTApewogICAgeCA8LSBzZXEoMSwxMCwgbGVuZ3RoLm91dD1uKQogICAgZSA8LSBybm9ybShsZW5ndGgoeCksIG1lYW49MCwgc2Q9MTApCiAgICB5IDwtIGIqeCArIGUKICAgIHh5X21hdCA8LSBkYXRhLmZyYW1lKElEPXNlcSh4KSwgeD14LCB5PXkpCiAgICByZXR1cm4oeHlfbWF0KQp9CgojIyBUd28gc2FtcGxlIGRhdGFzZXRzCmRhdDEgPC0gbWFrZV9ub2lzeV9kYXRhKDYpCmRhdDIgPC0gbWFrZV9ub2lzeV9kYXRhKDYpCgojIyBNZXJnaW5nIGRhdGEgaW4gd2lkZSBmb3JtYXQKZGF0X21lcmdlZF93aWRlIDwtIG1lcmdlKGRhdDEsIGRhdDIsCiAgICBieT0nSUQnLCBzdWZmaXhlcz1jKCcuMScsJy4yJykpCgojIyBtZXJnaW5nIGRhdGEgaW4gbG9uZyBmb3JtYXQgYW5kIHJlc2hhcGluZyB0byB3aWRlCmRhdF9tZXJnZWRfbG9uZyA8LSByYmluZCggY2JpbmQoZGF0MSxERj0xKSxjYmluZChkYXQyLERGPTIpKQpsaWJyYXJ5KHJlc2hhcGUyKQpkYXRfbWVsdGVkIDwtIG1lbHQoZGF0X21lcmdlZF9sb25nLCBpZC52YXJzPWMoJ0lEJywgJ0RGJykpCmRhdF9tZXJnZWRfd2lkZTIgPC0gZGNhc3QoZGF0X21lbHRlZCwgSUR+REYrdmFyaWFibGUpCgojIyBDSEVDSyB0aGV5IGFyZSB0aGUgc2FtZS4KZGF0X21lcmdlZF93aWRlID09IGRhdF9tZXJnZWRfd2lkZTIKYGBgCgpOZXZlcnRoZWxlc3MsIGRhdGEgdHJhbnNmb3JtYXRpb24gaXMgb2Z0ZW4gbmVjZXNzYXJ5IGJlZm9yZSByZWdyZXNzaW9uIGFuYWx5c2lzLiBGb3IgZG93bmxvYWRpbmcgdGlwcywgc2VlIGh0dHBzOi8vcmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbS9yc3R1ZGlvL2NoZWF0c2hlZXRzL21haW4vZGF0YS1pbXBvcnQucGRmCjwhLS1cdXJse2h0dHBzOi8vZ2l0aHViLmNvbS9yc3R1ZGlvL2NoZWF0c2hlZXRzL3Jhdy9tYXN0ZXIvZGF0YS10cmFuc2Zvcm1hdGlvbi5wZGZ9LS0+CgojIyBQLUhhY2tpbmcKCkFub3RoZXIgY2xhc3Mgb2YgZXJyb3JzIHBlcnRhaW5zIHRvIFAtaGFja2luZyAoYW5kIGl0J3MgdmFyaW91cyBzeW5vbnltcykgCgpgYGB7cn0Kc2V0LnNlZWQoMTIzKQpuIDwtIDUwClgxIDwtIHJ1bmlmKG4pCgojIyBSZWdyZXNzaW9uIE1hY2hpbmU6CiMjIHJlcGVhdGViZGx5IGZpbmRzIGNvdmFyaWF0ZSwgcnVucyByZWdyZXNzaW9uCiMjIHN0b3BzIHdoZW4gc3RhdGlzdGljYWxseSBzaWduaWZpY2FudCBhdCAuMVwlIAoKcCA8LSAxCmkgPC0gMAp3aGlsZShwID49IC4wMDEpeyAKICAgICMjIEdldCBSYW5kb20gQ292YXJpYXRlCiAgICBYMiA8LSAgcnVuaWYobikKICAgICMjIE1lcmdlIGFuZCBgQW5hbHl6ZScKICAgIGRhdF9pIDwtIGRhdGEuZnJhbWUoWDEsWDIpCiAgICByZWdfaSA8LSBsbShYMX5YMiwgZGF0YT1kYXRfaSkKICAgICMjIHVwZGF0ZSByZXN1bHRzIGluIGdsb2JhbCBlbnZpcm9ubWVudAogICAgcCA8LSBzdW1tYXJ5KHJlZ19pKSRjb2VmZmljaWVudHNbMiw0XQogICAgaSA8LSBpKzEKfQoKcGxvdChYMX5YMiwgZGF0YT1kYXRfaSwKICAgIHBjaD0xNiwgY29sPWdyZXkoLjUsLjUpLAogICAgbWFpbj1wYXN0ZTAoJ1JhbmRvbSBEYXRhc2V0ICcsIGksIjogcD0iLCByb3VuZChwLDQpKSkKYWJsaW5lKHJlZ19pKQojc3VtbWFyeShyZWdfaSkKYGBgCgoKYGBge3J9CiMjIFAtaGFja2luZyAyU0xTCnAgPC0gMQppaSA8LSAwCnNldC5zZWVkKDEyMykKd2hpbGUocCA+PSAuMDUpewogICAgIyMgR2V0IFJhbmRvbSBDb3ZhcmlhdGVzCiAgICBYMiA8LSAgcnVuaWYobikgICAgCiAgICBYMyA8LSAgcnVuaWYobikKICAgICMjIENyZWF0ZSBUcmVhdG1lbnQgVmFyaWFibGUgYmFzZWQgb24gQ3V0b2ZmCiAgICBjdXRvZmZzIDwtIHNlcSgwLDEsbGVuZ3RoLm91dD0xMSlbLWMoMSwxMSldCiAgICBmb3IodGF1IGluIGN1dG9mZnMpewogICAgICAgIFQzIDwtIDEqKFgzID4gdGF1KQogICAgICAgICMjIE1lcmdlIGFuZCBgQW5hbHl6ZScKICAgICAgICBkYXRfaSA8LSBkYXRhLmZyYW1lKFgxLFgyLFQzKQogICAgICAgIGl2cmVnX2kgPC0gZmVvbHMoWDF+MXxYMn5UMywgZGF0YT1kYXRfaSkKICAgICAgICAjIyBVcGRhdGUgcmVzdWx0cyBpbiBnbG9iYWwgZW52aXJvbm1lbnQKICAgICAgICBwdGFiIDwtIHN1bW1hcnkoaXZyZWdfaSkkY29lZnRhYmxlCiAgICAgICAgaWYoIG5yb3cocHRhYik9PTIpewogICAgICAgICAgICBwIDwtIHB0YWJbMiw0XQogICAgICAgICAgICBpaSA8LSBpaSsxCiAgICAgICAgfQogICAgfQp9CnN1bW1hcnkoaXZyZWdfaSkKYGBgCgojIFJhbmRvbSBDYXVzYWwgSW1wYWN0cwoKV2Ugc2ltcGx5IGFwcGx5IHRoZSB0aHJlZSBtYWpvciBjcmVkaWJsZSBtZXRob2RzIChJViwgUkRELCBESUQpIHRvIHJhbmRvbSB3YWxrcy4gV2UgZmluZCBhIHJlc3VsdCB0aGF0IGZpdHMgbW9sZCBhbmQgYWRkIHZhcmlvdXMgZXh0ZW5zaW9ucyB0aGF0IG1ha2UgaXQgYXBwZWFyIHJvYnVzdC4gVGhlIGFuYWx5c2lzIHNvdW5kcyBzY2llbnRpZmljLCBhbmQgeW91IGNvdWxkIHByb2JhYmx5IGJlIGNvbnZpbmNlZCBpZiBpdCB3ZXJlIG5vdCBqdXN0IHJhbmRvbSBub2lzZS4KCgpgYGB7ciwgbWVzc2FnZT1GLCB3YXJuaW5nPUZ9Cm4gPC0gMTAwMApuX2luZGV4IDwtIHNlcShuKQoKc2V0LnNlZWQoMSkKcmFuZG9tX3dhbGsxIDwtIGN1bXN1bShydW5pZihuLC0xLDEpKQoKc2V0LnNlZWQoMikKcmFuZG9tX3dhbGsyIDwtIGN1bXN1bShydW5pZihuLC0xLDEpKQoKcGFyKG1mcm93PWMoMSwyKSkKcGxvdChyYW5kb21fd2FsazEsIHBjaD0xNiwgY29sPXJnYigxLDAsMCwuMjUpLAogICAgeGxhYj0nVGltZScsIHlsYWI9J1JhbmRvbSBWYWx1ZScpCnBsb3QocmFuZG9tX3dhbGsyLCBwY2g9MTYsIGNvbD1yZ2IoMCwwLDEsLjI1KSwKICAgIHhsYWI9J1RpbWUnLCB5bGFiPSdSYW5kb20gVmFsdWUnKQpgYGAKCgojIyBJVgoKYGBge3J9CiMjIFNlYXJjaGluZyBmb3IgYSB2YWxpZCBpbnN0cnVtZW50CnAgPC0gMQppaSA8LSAwCnNldC5zZWVkKDMpCndoaWxlKHAgPj0gLjAwMSl7CiAgICAjIyBHZXQgUmFuZG9tIENvdmFyaWF0ZXMKICAgIHJhbmRvbV93YWxrMyA8LSBjdW1zdW0ocnVuaWYobiwtMSwxKSkKICAgICMjIE1lcmdlIGFuZCBgQW5hbHl6ZScKICAgIGRhdF9pIDwtIGRhdGEuZnJhbWUoCiAgICAgICAgWDE9cmFuZG9tX3dhbGsxLAogICAgICAgIFgyPXJhbmRvbV93YWxrMiwKICAgICAgICBYMz1yYW5kb21fd2FsazMpCiAgICBpdnJlZ19pIDwtIGZlb2xzKFgxfjF8WDJ+WDMsIGRhdGE9ZGF0X2kpCiAgICAjIyBVcGRhdGUgcmVzdWx0cyBpbiBnbG9iYWwgZW52aXJvbm1lbnQKICAgIHB0YWIgPC0gc3VtbWFyeShpdnJlZ19pKSRjb2VmdGFibGUKICAgIGlmKCBucm93KHB0YWIpPT0yKXsKICAgICAgICBwIDwtIHB0YWJbMiw0XQogICAgICAgIGlpIDwtIGlpKzEKICAgIH0KfQpzdW1tYXJ5KGl2cmVnX2kpCmBgYAoKCiMjIFJERAoKYGBge3IsIG1lc3NhZ2U9Riwgd2FybmluZz1GfQojIyBMZXQgdGhlIGRhdGEgdGFrZSBzaGFwZQojIyAoYXJvdW5kIHRoZSBsYXJnZSBkaWZmZXJlbmNlcyBiZWZvcmUgYW5kIGFmdGVyKQpuMSA8LSAyOTAKd2luZDEgPC0gYyhuMS0zMDAsbjErMzAwKQpkYXQxIDwtIGRhdGEuZnJhbWUodD1uX2luZGV4LCB5PXJhbmRvbV93YWxrMSwgZD0xKihuX2luZGV4ID4gbjEpKQpkYXQxX3N1YiA8LSBkYXQxWyBuX2luZGV4PndpbmQxWzFdICYgbl9pbmRleCA8IHdpbmQxWzJdLF0KCiMjIFRoZW4gZmluZCB5b3VyIGJpZyBicmVhawpyZWcwIDwtIGxtKHl+dCwgZGF0YT1kYXQxX3N1YltkYXQxX3N1YiRkPT0wLF0pCnJlZzEgPC0gbG0oeX50LCBkYXRhPWRhdDFfc3ViW2RhdDFfc3ViJGQ9PTEsXSkKCiMjIFRoZSBldmlkZW5jZSBzaG91bGQgc2hvdyBvcGVubHkgKGl0J3MganVzdCBzY2llbmNlKQpwbG90KHJhbmRvbV93YWxrMSwgcGNoPTE2LCBjb2w9cmdiKDAsMCwxLC4yNSksCiAgICB4bGltPXdpbmQxLCB4bGFiPSdUaW1lJywgeWxhYj0nUmFuZG9tIFZhbHVlJykKYWJsaW5lKHY9bjEsIGx0eT0yKQpsaW5lcyhyZWcwJG1vZGVsJHQsIHJlZzAkZml0dGVkLnZhbHVlcywgY29sPTEpCmxpbmVzKHJlZzEkbW9kZWwkdCwgcmVnMSRmaXR0ZWQudmFsdWVzLCBjb2w9MSkKYGBgCgpgYGB7ciwgbWVzc2FnZT1GLCB3YXJuaW5nPUYsIHJlc3VsdHM9J2FzaXMnfQojIyBEcmVzcyB3aXRoIHNvbWUgc3RhdGlzdGljcyBmb3IgYWRkZWQgY3JlZGliaWxpdHkKcmRkX3N1YiA8LSBsbSh5fmQrdCtkKnQsIGRhdGE9ZGF0MV9zdWIpCnJkZF9mdWxsIDwtIGxtKHl+ZCt0K2QqdCwgZGF0YT1kYXQxKQpzdGFyZ2F6ZXI6OnN0YXJnYXplcihyZGRfc3ViLCByZGRfZnVsbCwgCiAgICB0eXBlPSdodG1sJywKICAgIHRpdGxlPSdSZWNpcGUgUkREJywKICAgIGhlYWRlcj1GLAogICAgb21pdD1jKCdDb25zdGFudCcpLAogICAgbm90ZXM9YygnRmlyc3QgY29sdW1uIHVzZXMgYSBkYXRhc2V0IGFyb3VuZCB0aGUgZGlzY29udGludWl0eS4nLAogICAgJ1NtYWxsZXIgd2luZG93cyBhcmUgbW9yZSBjYXVzYWwsIGFuZCB3aGVyZSB0aGUgZWZmZWN0IGlzIGJpZ2dlci4nKSkKYGBgCgoKIyMgRElECgpgYGB7ciwgbWVzc2FnZT1GLCB3YXJuaW5nPUZ9CiMjIEZpbmQgYSByZXZlcnNhbCBvZiBmb3J0dW5lCiMjIChBIGdvb2Qgc3RvcnkgYWx3YXlzIGdvZXMgd2VsbCB3aXRoIGEgbmljZSBwcmUtdHJlbmQpCm4yIDwtIDMxOAp3aW5kMiA8LSBjKG4yLTIwLG4yKzIwKQpwbG90KHJhbmRvbV93YWxrMiwgcGNoPTE2LCBjb2w9cmdiKDAsMCwxLC41KSwKICAgIHhsaW09d2luZDIsIHlsaW09YygtMTUsMTUpLCB4bGFiPSdUaW1lJywgeWxhYj0nUmFuZG9tIFZhbHVlJykKcG9pbnRzKHJhbmRvbV93YWxrMSwgcGNoPTE2LCBjb2w9cmdiKDEsMCwwLC41KSkKYWJsaW5lKHY9bjIsIGx0eT0yKQpgYGAKCmBgYHtyLCBtZXNzYWdlPUYsIHdhcm5pbmc9RiwgcmVzdWx0cz0nYXNpcyd9CiMjIEtuZWFkIG91dCBhbnkgZWZmZWN0cyB0aGF0IGFyZSBub24tY2F1c2FsIAojIyAob3IgZXZlbiBqdXN0IGNvcnJlbGF0aW9uKQpkYXQyQSA8LSBkYXRhLmZyYW1lKHQ9bl9pbmRleCwgeT1yYW5kb21fd2FsazEsIGQ9MSoobl9pbmRleCA+IG4yKSwgUldpZD0xKQpkYXQyQiA8LSBkYXRhLmZyYW1lKHQ9bl9pbmRleCwgeT1yYW5kb21fd2FsazIsIGQ9MCwgUldpZD0yKQpkYXQyICA8LSByYmluZChkYXQyQSwgZGF0MkIpCmRhdDIkUldpZCA8LSBhcy5mYWN0b3IoZGF0MiRSV2lkKQpkYXQyJHRpZCA8LSBhcy5mYWN0b3IoZGF0MiR0KQpkYXQyX3N1YiA8LSBkYXQyWyBkYXQyJHQ+d2luZDJbMV0gJiBkYXQyJHQgPCB3aW5kMlsyXSxdCgojIyBSZXBvcnQgdGhlIHN0YXJzIGZvciBhbGwgdG8gZW5qb3kKIyMgKGFuZCByZW1pbmQgdGhhdCBzdGFibGUgY29lZmZpY2llbnRzIGFyZSB0aGUgZ29vZCBvbmVzKQpkaWRfZmUxIDwtIGxtKHl+ZCt0aWQsIGRhdGE9ZGF0Ml9zdWIpCmRpZF9mZTIgPC0gbG0oeX5kK1JXaWQsIGRhdGE9ZGF0Ml9zdWIpCmRpZF9mZTMgPC0gbG0oeX5kKlJXaWQrdGlkLCBkYXRhPWRhdDJfc3ViKQpzdGFyZ2F6ZXI6OnN0YXJnYXplcihkaWRfZmUxLCBkaWRfZmUyLCBkaWRfZmUzLAogICAgdHlwZT0naHRtbCcsCiAgICB0aXRsZT0nUmVjaXBlIERJRCcsCiAgICBoZWFkZXI9RiwKICAgIG9taXQ9YygndGlkJywnUldpZCcsICdDb25zdGFudCcpLAogICAgbm90ZXM9YygKICAgICAnRml4ZWQgZWZmZWN0cyBmb3IgdGltZSBpbiBjb2x1bW4gMSwgZm9yIGlkIGluIGNvbHVtbiAyLCBhbmQgYm90aCBpbiBjb2x1bW4gMy4nLAogICAgICdGaXhlZCBlZmZlY3RzIGNvbnRyb2wgZm9yIG1vc3Qgb2YgeW91ciBjb25jZXJucy4nLAogICAgICdBbnl0aGluZyBlbHNlIGNyZWF0ZXMgYSBiaWFzIGluIHRoZSBvcHBvc2l0ZSBkaXJlY3Rpb24uJykpCmBgYAoKCgoKCjwhLS0gIyMgQ09NUElMRSBGUk9NIENMSQogICAgUnNjcmlwdCAtZSAicm1hcmtkb3duOjpyZW5kZXIoJ0RhdGFTY2llbnRpc20uUm1kJykiCiAgICBjcCBEYXRhU2NpZW50aXNtLmh0bWwgLi4vZG9jcy9EYXRhU2NpZW50aXNtLmh0bWwKLS0+CgoK