####################################################################
#
#  Worked through high-mode detection OSSE
# ------------------------------------------------------------
#
#     Supplementary code for Cusworth et al., (2018) ACPD
#
#     Author:  Daniel Cusworth
#     Contact: dancusworth @ gmail
#     Date:    9/27/2018
#
#     INSTRUCTIONS:
#	- Must first generate footprints using the WRF-STILT
#	model. See https://uataq.github.io/stilt/docs/
#	- Modify fields in "user-defined variables"
#       - Run from command: line Rscript example_acp.R
#       - Results are output in Rdata files:
#		* sep_pred_df.rds - predictions POD/FAR etc 
#		* sep_coef_df.rds - xhat for each realization 
# 
#
#
#     Inverse analysis utilized in this script is based on
#     the work of Hastie, Tibshirani, and Qian:
#        Ref: web.stanford.edu/~hastie/glmnet/glmnet_alpha.html
#
#     Classification metrics are based on the definitions 
#     of probability of detection (POD) and false alarm ratio (FAR)
#     as discussed in Brassuer and Jacob (2017)
#
#     This software is free and comes with no warranty. Please 
#     feel free to contact me with any questions.
#
#
######################################################################


# Load R packages needed to run OSSE

library(ncdf4)
library(Matrix)
library(arrayhelpers)
library(glmnet)
library(dplyr)
library(plotrix)
library(doMC)
set.seed(200)
library(geosphere)
library(parallelMap)


#-------                        -------#
#------- USER DEFINED VARIABLES -------#
#-------                        -------#

nemitters = %NEMIT%                             # Number of emitters in field
one_footprint_dir = %FOOT_FILE%                 # Location of one example footprint file 
xlims = c(%LEFT_BOUND%, %RIGHT_BOUND%)          # Observing domain longitude boundaries 
xlim_buffer = c(%LEFT_BOUND%, %RIGHT_BOUND%)    # Emission domain longitude boundaries
ylims = c(%LOW_BOUND%, %UP_BOUND%)              # Observing domain latitude boundaries
ylim_buffer = c(%LOW_BOUND%, %UP_BOUND%)        # Emission domain latitude boundaries 
fat_prob = %FAT_PROB%                           # Probability (0-1) of high-mode emitter
site_prob = c(%SMALL%, %MED%, %LAR%)            # Probability that emitters are small, medium,
						#      or large production sites
well_dir = %WELL_DIR%			        # Location of CSV file containing emission
						# PDF information with following headers:
						#   cat: production size category, values must equal
						#	 10 (for facilities of 0-10 Mcf/d), 
						#	 100 (for facilities of 100-1000 Mcf/d, 
						#	 or 1000 (for facilities of 1000+ Mcf/d
						#   kgh: emissions in kg/h
						#   is_super: is emitter high-mode? "yes" or "no"
seed_len = %NUM_SEED%				# Number of realizations to do
sep_obs_configs = c(%NAME%)			# Character name for run, e.g. 'geocarb'		
noises = c(%NOISE%)				# Instrument noise in PPB	
xres = %XRES%					# X resolution of satellite pixel
yres = %YRES%					# Y resolution of satellite pixel
time_res = c(%1%, %2%, %...%)			# Vector of overpasses (in UTC) of satellite
terr = %TERR%					# Transport error in PPB
num_cores = %NUM_CORES%				# Number of cores
L = %L%						# Spatial correlation length scale
tau = %tau%					# Temporal correlation length scale
foot_fl = %FOOT%				# Directory where footprints are contained 


#-------                        -------#
#-------        FUNCTIONS       -------#
#-------                        -------#

make_emitter_fixed = function(nemitters, fat_prob, iseed1=200, iseed2=200){

        #Given domain, set number of emitters
        #Given set of emitters, using given probability
        #define a number of high mode-emitters
        #Randomly place them inside the domain
        set.seed(iseed2)

        #Make a buffer region - so that candidate points have to be contained
        pot_lon = foot_lon[near_highx][foot_lon[near_highx] <= xlim_buffer[2]
                        & foot_lon[near_highx] >= xlim_buffer[1]]
        pot_lat = foot_lat[near_highy][foot_lat[near_highy] <= ylim_buffer[2]
                        & foot_lat[near_highy] >= ylim_buffer[1]]

	#Get number of high-mode of emitters
        nsuper = trunc(nsub_sel * fat_prob)
        potential_points = data.frame(lon = rep(pot_lon, each=length(pot_lat)),
                                lat = rep(pot_lat, times=length(pot_lon)))

	#Distribute these emitters across field
        emitter_points = sample(1:dim(potential_points)[1], nemitters)
        emitter_points = emitter_points[sub_inds]
        set.seed(iseed1)
        super_points = emitter_points[sample(1:length(emitter_points), nsuper)]
        set.seed(iseed2)

        emitter_locs = potential_points[setdiff(emitter_points, super_points),]
        super_locs = potential_points[super_points,]

        all_emitters = rbind(emitter_locs, super_locs)

        return(list(all_emitters, emitter_locs, super_locs))
}

#Function to make spatial location of wells
make_well_locations = function(nemitter = 100){
	
	#### Create location of wells and set production size type ####
	iemit = make_emitter_fixed(nemitter, fat_prob=fat_prob, iseed1=200, iseed2=200)

	#Assign these emitters a facility size depending on probabilities
	ind_samp = runif(dim(iemit[[1]])[1])
	cum_samp = cumsum(site_prob)
	site_type = NULL
	for(i in 1:length(ind_samp)){

		if(ind_samp[i] < cum_samp[1]){
			site_type[i] = 1
		}else if(ind_samp[i] < cum_samp[2] & ind_samp[i] > cum_samp[1]){
			site_type[i] = 2
		}else{
			site_type[i] = 3
		}
	}

	return(list(iemit, site_type))
}

#Make truth inventory given well locations, site types, and a random seed
make_truth_inventory = function(iemit, site_type, iseed=200){


	#Pick number of emitters and generate xtruth locations/emissions
	xA = make_prior_locs(iemit[[1]], avg_prod=1, sd_prod=.4, avg_fat=500, sd_fat=50)
	nsuper = trunc(dim(iemit[[1]])[1] * fat_prob)
	set.seed(iseed)
	super_points = sample(1:dim(iemit[[1]])[1], nsuper)
	reg_points = which(!(1:dim(iemit[[1]])[1] %in% super_points)) 	

	#Given site type - associate the correct emissions both super and regular
	x_truth = xA

	#Site Type 1 - 10-100
	x_truth[intersect(reg_points, which(site_type == 1)), "prior_emis"] =
		sample(wells[wells$is_super == "no" & wells$cat == 10, "kgh"],
			length(intersect(reg_points, which(site_type == 1))),replace=T)
	x_truth[intersect(super_points, which(site_type == 1)), "prior_emis"] =
		sample(wells[wells$is_super == "yes" & wells$cat == 10, "kgh"],
			length(intersect(super_points, which(site_type == 1))),replace=T)

	#Site Type 2 - 100-1000
	x_truth[intersect(reg_points, which(site_type == 2)), "prior_emis"] =
		sample(wells[wells$is_super == "no" & wells$cat == 100, "kgh"],
			length(intersect(reg_points, which(site_type == 2))),replace=T)
	x_truth[intersect(super_points, which(site_type == 2)), "prior_emis"] =
		sample(wells[wells$is_super == "yes" & wells$cat == 100, "kgh"],
			length(intersect(super_points, which(site_type == 2))),replace=T)

	#Site Type 3 - above 1000
	x_truth[intersect(reg_points, which(site_type == 3)), "prior_emis"] =
		sample(wells[wells$is_super == "no" & wells$cat == 1000, "kgh"],
			length(intersect(reg_points, which(site_type == 3))),replace=T)
	x_truth[intersect(super_points, which(site_type == 3)), "prior_emis"] =
		sample(wells[wells$is_super == "yes" & wells$cat == 1000, "kgh"],
			length(intersect(super_points, which(site_type == 3))),replace=T)

	#Look at emissions and remove any high mode emitters that don't meet threshold
	too_small = x_truth[super_points, "prior_emis"] <= med_thres
	reg_points = c(reg_points, super_points[too_small])
	super_points = super_points[!too_small]

	#Save out locations of super and regular emitters
	super_locs = x_truth[super_points,]
	emitter_locs = x_truth[reg_points,]

	return(list(x_truth, super_locs, emitter_locs, super_points, reg_points))
}


#Function to create pseudo-observations given H, R, and instrument noise
make_pseudo = function(jac_iter, L_chol, all_obs_places, mnoise, x_truth){
	####First create pseudo-observations using given Jacobian and truth prior
	full_jacob = jac_iter
	all_emitters = iemit[[1]]
	mag_noise = mnoise

	#remove NA rows
	good_rows = which(!is.na(full_jacob[,1]))

	#Select rows used based on good obs & observing system
	sel_row = good_rows

	#Select only columns and rows that work for particular modeling framework
	Hnear_near = full_jacob[sel_row,]
	update_obs_places = all_obs_places[sel_row,]

	#Make OBS
	x_truth2 = join(as.data.frame(all_emitters), as.data.frame(x_truth), by=c("lon","lat"), type="left")
	yMODEL_near = ((Hnear_near %*% x_truth2[,"prior_emis"])*1000)
	yOBS_pure = yMODEL_near

	#Add noise to OBS - different for each type of observer
	yOBS = rnorm(length(yOBS_pure), as.vector(yOBS_pure), as.vector(yOBS_pure) * mnoise)

	#Whiten Jacobian
	yWHITE = solve(L_chol, yOBS)

	return(yWHITE)

}

#Function to perform calculation H*R-1*H via Cholesky decomposition
whiten_jac = function(jac_iter, R, all_obs_places){

        full_jacob = jac_iter

        #remove NA rows
        good_rows = which(!is.na(full_jacob[,1]))

        #Select rows used based on good obs & observing system
        sel_row = good_rows

        #Select only columns and rows that work for particular modeling framework
        Hnear_near = full_jacob[sel_row,]
        update_obs_places = all_obs_places[sel_row,]


	#Do Cholesky decomposition to get lower triangular matrix
        R_chol = chol(R)
        L_chol = t(R_chol)

        #Using error covariance, "pre-whiten" the Jacobian matrix
        H_white = solve(L_chol, Hnear_near)

	return(list(H_white, L_chol, Hnear_near))

}


#Clustering functions
cluster_to_pred = function(min_coef, iemit, super_points, reg_points){

        set.seed(200)
        #Do K-means on coefficients to separate
        kclusters = kmeans(min_coef, centers=2, iter.max=1e8, nstart=10)
        max_clust = which(kclusters$centers == max(kclusters$centers))
        reg_emitters = which(kclusters$cluster != max_clust)
        top_emitters = which(kclusters$cluster == max_clust)

        pot_sup = iemit[[1]][top_emitters,]
        pot_reg = iemit[[1]][reg_emitters,]

        #Check whether detected/false pos/neg
        correctly_detected = iemit[[1]][intersect(top_emitters, super_points),]
        false_positive = iemit[[1]][intersect(top_emitters, reg_points),]
        false_negative = iemit[[1]][intersect(reg_emitters, super_points),]

        return(list(pot_sup, pot_reg, correctly_detected, false_positive, false_negative))
}

#Function to predict which emitters are high mode-emitters given 
#an optimal state vector estimate
make_pred = function(min_coef, iemit, super_points, reg_points){

	ethres = mean(min_coef, na.rm=T) + 2.5*sd(min_coef, na.rm=T)

	#ethres = quantile(min_coef, perc, na.rm=T)
	reg_emitters = which(min_coef <= ethres)
	top_emitters = which(min_coef >= ethres)

        pot_sup = iemit[[1]][top_emitters,]
        pot_reg = iemit[[1]][reg_emitters,]


	#Check whether detected/false pos/neg
        correctly_detected = iemit[[1]][intersect(top_emitters, super_points),]
        false_positive = iemit[[1]][intersect(top_emitters, reg_points),]
        false_negative = iemit[[1]][intersect(reg_emitters, super_points),]

        return(list(pot_sup, pot_reg, correctly_detected, false_positive, false_negative))

}

#Inversion LASSO/RIDGE function
lasso_func = function(Hmat, yFIT, alpha_val=0.95, iemit, super_points, reg_points){
        all_zero=T
        idx = 1

        while(all_zero){
                set.seed(200)
                fit = cv.glmnet(x = Hmat, y=yFIT, alpha=alpha_val, standardize=F, nfolds=5, parallel=T)#, lower.limit=0)
                min_coef = as.vector(coef(fit, s=fit$lambda.min)[2:(dim(Hmat)[2]+1)])

                num_zero = sum(min_coef == 0)

                all_zero = ifelse(num_zero == length(min_coef), T, F)
                idx = idx + 1

                all_zero = ifelse(idx > 10, F, all_zero)
                print(paste("retrying fit for", idx, "-th time"))
        }

        #Prediction based on coefficients
        lcoef = as.numeric(coef(fit, s=fit$lambda.min))
        lmu = lcoef[1]
        lbeta = lcoef[2:length(lcoef)]

        ymod = (predict.cv.glmnet(fit, newx=Hmat, s=fit$lambda.min)*1000)

        if(sum(lbeta) == 0){
                return(list(ymod, NULL, lbeta, idx, fit$lambda.min, fit))
        }else{
                ldetect = make_pred(lbeta, iemit, super_points, reg_points)
                pot_sup1  = ldetect[[1]]
                return(list(ymod, ldetect, lbeta, idx, fit$lambda.min, fit))
        }
}

#Do detection on these coefficients
sup_det = function(coefs, isim_name, perc=2.5){
        if(sum(coefs[,isim_name] == 0) == dim(coefs)[1]){
                return(data.frame(lon=rep(NA,3), lat=rep(NA,3)))
        }else{
		ethres = mean(coefs[,isim_name], na.rm=T) + perc*sd(coefs[,isim_name], na.rm=T)
		reg_emitters = which(coefs[,isim_name] <= ethres)
		top_emitters = which(coefs[,isim_name] >= ethres)
                pot_sup = coefs[top_emitters,]
                if(is.null(dim(pot_sup))){
                        pot_sup = data.frame(rbind(pot_sup, array(dim=c(2,length(pot_sup)))))
                }
                return(pot_sup)
        }
}


#Regrid coefficients at certain length scale
#I.e., update predictions given a certain tolerance level
threshold_detection = function(dat, iname, dx, real_sup=NULL, perc=2.5){

        #Do high mode-emitter prediction
        if(is.null(real_sup)){
                real_sup = data.frame(sup_det(dat,"truth")[,c("lon","lat")])
        }
        pred_sup = data.frame(sup_det(dat,iname, perc=perc)[,c("lon","lat")])
        pred_sup = pred_sup[complete.cases(pred_sup),]

        #Get correct predictions
        tp = merge(real_sup, pred_sup, by=c("lon","lat"))
        tp = tp[complete.cases(tp),]
	tp = unique(tp)
        nTP = dim(tp)[1]

        #Get False positives/negatives
        fn = suppressMessages(anti_join(real_sup, pred_sup))
        fp = suppressMessages(anti_join(pred_sup, real_sup))
	fn = unique(fn)
	fp = unique(fp)
        nFP = dim(fp)[1]
        nFN = dim(fn)[1]

	if(dim(real_sup)[1] == 0){

		return(c(nTP, nFP, nFN))

	}else{

		#See if distance between fp's and fn's pass threshold test
		okay_FN = 0
		if(nFN != 0 & nFP != 0){
			for(i in 1:nFN){
				all_dist = NULL
				for(j in 1:dim(pred_sup)[1]){
					idist = distm(c(fn[i,"lon"], fn[i,"lat"]),
						c(pred_sup[j,"lon"], pred_sup[j,"lat"]),
						fun = distHaversine) / 1000
					all_dist = c(all_dist, idist)
				}
				is_okay = sum(sum(all_dist < dx)>0)
				okay_FN = okay_FN + is_okay
			}
		}

		okay_FP = 0
		if(nFP != 0){
			for(i in 1:nFP){
				all_dist = NULL
				for(j in 1:dim(real_sup)[1]){
					idist = distm(c(fp[i,"lon"], fp[i,"lat"]),
						c(real_sup[j,"lon"], real_sup[j,"lat"]),
						fun = distHaversine) / 1000
					all_dist = c(all_dist, idist)
				}
				is_okay = sum(sum(all_dist < dx) > 0)
				okay_FP = okay_FP + is_okay
			}
		}

		#Update True Positives
		nFN = nFN - okay_FN
		nFP = nFP - okay_FP
		nTP = nTP + okay_FP + okay_FN

		return(c(nTP, nFP, nFN))

	}

}


#Given domain limits xlims/ylims, and a spatial resolution
#of a satellite, populate domain with receptor locations
#Funtion receives xlims/ylims and resolution as input
#and outputs lat/lon/time of each satellite observation
when_sat_obs = function(xlims, ylims, xres = 2.7, yres = 3, time_res = c(8,13,16)){

        #To get a starting point, assume you get an observation in 
        #the center of your domain - then populate outward

        center_lon = mean(xlims)
        center_lat = mean(ylims)

        center_gridx = foot_lon[which(abs(center_lon - foot_lon) == min(abs(center_lon - foot_lon)))]
        center_gridy = foot_lat[which(abs(center_lat - foot_lat) == min(abs(center_lat - foot_lat)))]


        #Convert km resolution to lat/lon
        xdists = seq(0,1,length=1000)
        npoints= cbind(center_gridx + xdists, rep(center_gridy, times=1000))
        cent_mat = cbind(rep(center_gridx, times=1000), rep(center_gridy, times=1000))

        xdist_mat = distm(x=cent_mat, y=npoints)/1000
        dlon = xdists[which(abs(xdist_mat - xres) == min(abs(xdist_mat - xres)), arr.ind=T)[1,2]]

        npoints = cbind(rep(center_gridx, times=1000), center_gridy + xdists)
        cent_mat = cbind(rep(center_gridx, times=1000), rep(center_gridy, times=1000))
        ydist_mat = distm(x=cent_mat, y=npoints)/1000
        dlat = xdists[which(abs(ydist_mat - yres) == min(abs(ydist_mat - yres)), arr.ind=T)[1,2]]

        xpoints = c(seq(center_gridx, xlims[1], by=-dlon), seq(center_gridx, xlims[2], by=dlon))
        ypoints = c(seq(center_gridy, ylims[1], by=-dlat), seq(center_gridy, ylims[2], by=dlat))


        #Find closest footprints to xypoints
        foot_locx = NULL
        for(i in 1:length(xpoints)){
                ix = which(abs(foot_lon-xpoints[i]) == min(abs(foot_lon-xpoints[i])))
                foot_locx = c(foot_locx, foot_lon[ix])
        }
        foot_locx = unique(foot_locx)

        foot_locy = NULL
        for(i in 1:length(ypoints)){
                iy = which(abs(foot_lat-ypoints[i]) == min(abs(foot_lat-ypoints[i])))
                foot_locy = c(foot_locy, foot_lat[iy])
        }
        foot_locy = unique(foot_locy)

        #Make data-frame of xy locs of observations and time of observation
        sat_obs = data.frame(lon = rep(foot_locx, each = length(foot_locy) * length(time_res)),
                             lat = rep(rep(foot_locy, times = length(foot_locx), each=length(time_res))),
                             hh = rep(time_res, times = length(foot_locx) * length(foot_locy)))


        return(sat_obs)

}

#Distance/time functions for construction of observational error covariance
#Written as a map-reduce to speed up computations

dist_func = function(jlon, jlat){
        idists = distm(c(ilon, ilat), c(jlon, jlat), fun = distHaversine)/1000
        return(idists)
}

corr_func = function(idist, idelta_t){
        out = 4 * (exp(-idist/L) *  exp(-idelta_t/tau))
        out = ifelse(out < 1e-3, 0, out)
        return(out)
}

inside_map = function(ilon, ilat){


        dist_func = function(jlon, jlat){
                idists = distm(c(ilon, ilat), c(jlon, jlat), fun = distHaversine)/1000
                return(idists)
        }

        dists = mapply(dist_func, lon_array[one_day], lat_array[one_day])

        return(dists)
}

time_map = function(itime){
        time_vec = as.vector(abs(as.POSIXlt(itime) - as.POSIXlt(time_array[one_day])) / 3600)
        return(time_vec)
}


#-------                        -------#
#-------     PRE-PROCESSING     -------#
#-------                        -------#


#Set the number of surface emitters you wish to simulate in experiment
nsub_sel = nemitters
sub_inds = sort(sample(1:100, nsub_sel))

#Load single OSSE footprint to get dimensions of new grid
one_foot = nc_open(one_footprint_dir)
foot_lon = ncvar_get(one_foot, "lon")
foot_lat = ncvar_get(one_foot, "lat")
jac_len = ncvar_get(one_foot, "foot")
nc_close(one_foot)

#Logical vector to choose subdomain
near_highx = foot_lon <= xlims[2] & foot_lon >= xlims[1]
near_highy = foot_lat <= ylims[2] & foot_lat >= ylims[1]

#Emission PDF information
wells = read.table(well_dir, sep=",", header=T)

#Make seed sequence
seed_array = trunc(seq(1,1000, length=seed_len))

#Observation files
oloc = when_sat_obs(xlims, ylims, xres=xres, yres=yres, time_res = time_res)
foot_files = list.files(foot_fl)
foot_dates = as.POSIXlt(substr(foot_files, 1, 10), format="%Y%m%d%H", tz="UTC")
foot_meta = data.frame(fl = paste(foot_fl,foot_files,sep="/"),
		yy = foot_dates$year+1900,
		mm = foot_dates$mon+1,
		dd = foot_dates$mday,
		hh = foot_dates$hour,
		lon = as.numeric(substr(foot_files, 12,18)),
		lat = as.numeric(substr(foot_files, 20,26)))	
foot_fls = merge(oloc, foot_meta, by=c("lon", "lat", "dd", "hh"))


##### USER SHOULD DOUBLE CHECK THAT THE MERGE BETWEEN oloc AND foot_fls
##### WORKED AND PRESERVED THE ORDERING OF OLOC!!!!!!!!!!!!!!


#-------                        -------#
#-------      RUN INVERSION     -------#
#-------                        -------#


#Make locations for every realization
iemits = make_well_locations(nemitter = nemitters)
iemit = iemits[[1]]
site_type = iemits[[2]]

#Create realizations of truth inventory
seed_array = trunc(seq(1,1000, length=seed_len))

#Build jacobian based on observing system
for(i in 1:dim(foot_fls)[1]){

	ifoot_dir = foot_fls[i, "fl"]
	isfine = tryCatch({
		nc = nc_open(ifoot_dir)
		foot_nf = ncvar_get(nc, "foot")
		H_mat = apply(foot_nf, 1:2, sum, na.rm=T)
		H = as.vector(H_mat)
		nc_close(nc)
		oval = T

	}, error = function(err){
		return(F)
	})


	if(i==1){
		full_jacob = array(dim=c(dim(oloc)[1], nemitters))
		xinds = NULL
		yinds = NULL
		for(j in 1:nemitters){
			ixind = which(abs(foot_lon - iemit[[1]][j,"lon"]) < 1e-4)
			iyind = which(abs(foot_lat - iemit[[1]][j,"lat"]) < 1e-4)
			xinds = c(xinds, ixind)
			yinds = c(yinds, iyind)
		}
	}

	if(oval){
		#Subselect H_mat given truth lat/lon    
		H_sel = NULL
		for(j in 1:nemitters){
			H_sel = cbind(H_sel, H_mat[xinds[j],yinds[j]])
		}
		full_jacob[i,] = H_sel
	}

}

#Make error covariance matrix
lon_array  = as.numeric(as.character(oloc[,"lon"]))
lat_array  = as.numeric(as.character(oloc[,"lat"]))
time_array = as.POSIXlt(paste("2013-10",as.character(oloc[,"dd"]),
                as.character(oloc[,"hh"]),sep="-"), format="%Y-%m-%d-%H")

options(
        parallelMap.default.mode        = "multicore",
        parallelMap.default.cpus        = num_cores,
        parallelMap.default.show.info   = FALSE
)

parallelStart()
dist_mat_one = do.call(rbind, parallelMap(inside_map, lon_array, lat_array))
gc()
time_mat_one = do.call(rbind, parallelMap(time_map, as.character(time_array)))
parallelStop()

#Only compute for values of matrix we know are okay
sel_comp = time_mat_one <= 24
R = Matrix(0, nrow=dim(dist_mat_one)[1], ncol=dim(dist_mat_one)[2], sparse=T)
R[sel_comp] = terr * (exp(-dist_mat_one[sel_comp]/L) *  exp(-time_mat_one[sel_comp]/tau))
R[R < 1e-3] = 0
diag(R) = tran_err + noises

#Whiten the Jacobians
ipseudo = whiten_jac(iH, iR, iobs)
assign(paste("H_",sep_obs_configs[i],sep=""), iH)
assign(paste("obs_",sep_obs_configs[i],sep=""), iobs)
assign(paste("R_",sep_obs_configs[i],sep=""), iR)
assign(paste("Hwhite_",sep_obs_configs[i],sep=""), ipseudo[[1]])
assign(paste("Lwhite_",sep_obs_configs[i],sep=""), ipseudo[[2]])

#Initialize output arrays
obs_configs = sep_obs_configs
coef_df = array(dim=c(nemitters, length(obs_configs)+3, length(seed_array), 2))
dimnames(coef_df)[[2]] = c("lon","lat","truth",obs_configs)
dimnames(coef_df)[[4]] = c("lasso", "ridge")
pred_df = array(dim=c(3,length(obs_configs), length(seed_array), 2, 3, 5))
dimnames(pred_df)[[2]] = obs_configs
dimnames(pred_df)[[4]] = c("lasso","ridge")
thres_array = c(1,3,5)
dimnames(pred_df)[[5]] = thres_array
perc_array = seq(1.65,2.5, length=5)
dimnames(pred_df)[[6]] = perc_array

#Loop over all seeds
for(i in 1:length(seed_array)){

	itruth = make_truth_inventory(iemit, site_type, iseed=seed_array[i])
	x_truth = itruth[[1]]
	super_locs = itruth[[2]]
	emitter_locs = itruth[[3]]
	super_points = itruth[[4]]
	reg_points = itruth[[5]]

	coef_df[,"lon",i,] = x_truth[,"lon"]
	coef_df[,"lat",i,] = x_truth[,"lat"]
	coef_df[,"truth",i,] = x_truth[,"prior_emis"]

		#Loop over each observing configuration
		for(j in 1:length(obs_configs)){
			jH_white = get(paste("Hwhite_", obs_configs[j], sep=""))
			jY_white = make_pseudo(
					get(paste("H_", obs_configs[j], sep="")),
					get(paste("Lwhite_", obs_configs[j], sep="")),
					get(paste("obs_", obs_configs[j], sep="")), 
					noises[j], 
					x_truth)

			#Perform inversion
			jlasso = lasso_func(jH_white, jY_white/1000, alpha=.95, iemit, super_points, reg_points)
			jridge = lasso_func(jH_white, jY_white/1000, alpha=.05, iemit, super_points, reg_points)


			#Save coefficients
			coef_df[,obs_configs[j],i,"lasso"] = jlasso[[3]]
			coef_df[,obs_configs[j],i,"ridge"] = jridge[[3]]
			

		}

		#Super-detection based on threshold
		for(j in 1:length(obs_configs)){
		for(k in 1:3){
		for(l in 1:5){

			jl1 = threshold_detection(coef_df[,,i,"lasso"], obs_configs[j], 
				thres_array[k], real_sup = x_truth[super_points,], perc=perc_array[l])
			jr1 = threshold_detection(coef_df[,,i,"ridge"], obs_configs[j], 
				thres_array[k], real_sup = x_truth[super_points,], perc=perc_array[l])

			pred_df[,j,i,1,k,l] = jl1
			pred_df[,j,i,2,k,l] = jr1

		}
		}
		}

	print(paste("random iteration", seed_array[i], "done"))


	#Save output
	saveRDS(pred_df, paste("sep_pred_df",seed_len,"_",nsub_sel,".rds",sep=""))
	saveRDS(coef_df, paste("sep_coef_df",seed_len,"_",nsub_sel,".rds",sep=""))

}
