knitr::opts_chunk$set(cache = TRUE)
suppressMessages(library(tidyverse))
suppressMessages(library(splines))
set.seed(271828)
Since \(V_i\) are independent with mean \(0\) and variance \(1\) we know that \[ \mathbb{E}[V_iV_jV_kV_l] = 0 (i \neq j,k,l) \text{ or } 1(i=j,k=l) \text{ unless } i=j=k=l \]
Then combining all the 3 cases we can see that:
\[ \mathbb{E}[(V^TAV)^2] = \sum_{i=1}^na_{ii}^2\mathbb{E}(V_i^4) + \text{constant} \] Then note that:
\[ \mathbb{E}(V_i^4) = Var(V_i^2) + 1 \] Since we want to minimize this function we can note that \(Var(V_i^2)\) is 0.
And then note that for the probability provided:
\[Var(V_i^2) = E(V_i^4) - E(V_i^2)^2 = \\ = (\frac{1}{2} + \frac{1}{2}) - (\frac{1}{2}+\frac{1}{2})^2 = 1-1=0 \]
Which means taking entries of V to be \(\pm1\) with probability \(\frac{1}{2}\) minimizes \(Var(\hat{tr}(A))\).
\[ H \left[ \begin{array}{c} V \\ 0 \end{array} \right] = \left[ \begin{array} {cc} H_{11} & H_{12} \\ H_{21} & H_{22} \end{array} \right] \left[ \begin{array}{c} V \\ 0 \end{array} \right] = \left[ \begin{array}{c} H_{11}V \\ H_{21}V \end{array} \right] \]
For \(k\):
\[ H\left[ \begin{array}{c} H_{11}^{k-1}V \\ 0 \end{array} \right] = \left[ \begin{array} {cc} H_{11} & H_{12} \\ H_{21} & H_{22} \end{array} \right] \left[ \begin{array}{c} H_{11}^{k-1}V \\ 0 \end{array} \right] = \left[ \begin{array}{c} H_{11}^{k}V \\ H_{21}H_{11}^{k}V \end{array} \right] \]
Then note that it clearly works for \(k = 2\)
Then see that assuming it works for k, we can see that it will work for k+1:
\[ H\left[ \begin{array}{c} H_{11}^{k+1-1}V \\ 0 \end{array} \right] = \left[ \begin{array} {cc} H_{11} & H_{12} \\ H_{21} & H_{22} \end{array} \right] \left[ \begin{array}{c} H_{11}^{k}V \\ 0 \end{array} \right] = \left[ \begin{array}{c} H_{11}^{k+1}V \\ H_{21}H_{11}^{k+1}V \end{array} \right] \]
Then by induction it’s true.
leverage <- function(x,y,w,r=10, k = 100) {
# Do QR factorization
qr_x <- qr(x)
qr_y <- qr(y)
# Store number of rows
n <- nrow(x)
m <- nrow(y)
# Initialize Leverages
levx <- NULL
levy <- NULL
# Loop
for (i in 1:m) {
v <- ifelse(runif(n) > 0.5,1,-1)
v[-w] <- 0
v0 <- qr.fitted(qr_x,v)
v1 <- qr.fitted(qr_y,v)
f <- v0
g <- v1
for (j in 2:r) {
v0[-w] <- 0
v1[-w] <- 0
v0 <- qr.fitted(qr_x,v0)
v1 <- qr.fitted(qr_y,v1)
f <- f + v0/j
g <- g + v1/j
}
levx <- c(levx,sum(v*f))
levy <- c(levy,sum(v*g))
}
std.err.x <- exp(-mean(levx)) * sd(levx)/sqrt(k)
std.err.y <- exp(-mean(levy)) * sd(levy)/sqrt(k)
levx <- 1 - exp(-mean(levx))
levy <- 1 - exp(-mean(levy))
r <- list(levx = levx,std.err.x = std.err.x,
levy = levy,std.err.y = std.err.y)
return(r)
}
x <- c(1:1000)/1000
X1 <- 1
for (i in 1:10) {
X1 <- cbind(X1, x^i)
}
X2 <- cbind(1, bs(x,df = 10))
leverages <- leverage(X1, X2, 1:100)
cat(" The leverages are", leverages$levx, "and", leverages$levy, "\n",
"The difference is", leverages$levx - leverages$levy, "\n",
"The larger leverage is for the design: ", if_else(leverages$levx > leverages$levy,
true = "Normal (non-splines) Design",
false = "Splines Design"))
## The leverages are 0.9968716 and 0.9944505
## The difference is 0.002421114
## The larger leverage is for the design: Normal (non-splines) Design
Method of moments estimates:
\[ \mu =\frac{\alpha}{\lambda}; \sigma^2 = \frac{\alpha}{\lambda^2} \Rightarrow \\ \lambda = \frac{\mu}{\sigma^2} \] Then by substituting into the equation for \(\sigma^2\) we get: \[ \alpha = \frac{\mu^2}{\sigma^2} \]
Now using the sample mean and variance instead of \(\mu\) and \(\sigma^2\)
\[ \alpha = \frac{\bar{X}^2}{S^2} \] \[ \lambda = \frac{\bar{X}}{S^2} \]
\[ L(\alpha, \lambda) = \prod_{i=1}^n{\frac{\lambda^\alpha x_i^{\alpha-1}e^{-\lambda x_i}}{\Gamma(\alpha)}} \]
\[ l(\alpha, \lambda) = \alpha \log(\lambda) + (\alpha - 1)\sum_{i=1}^n{\log(x_i)} - \lambda\sum_{i=1}^nx_i-n \log(\Gamma(\alpha)) \]
\[\frac{\delta l}{\delta \alpha} = n log(\lambda) +\sum_{i=1}^n \log(x_i) - n\psi(\alpha)\]
\[\frac{\delta l}{\delta \lambda} = \frac{n \alpha}{\lambda} - \sum_{i=1}^n{x_i}\] \[ \frac{\delta^2 l}{\delta \alpha^2} = -n \psi_1(\alpha)\] \[ \frac{\delta^2 l}{\delta \lambda^2} = -\frac{n \alpha}{\lambda^2}\] \[ \frac{\delta^2l}{\delta \alpha \delta\lambda} = \frac{n}{\lambda} \]
Where \(\psi\) is the digamma and \(\psi_1\) is the trigamma function.
Then the score is:
\[ \left( \begin{array} {cc} \sum_{i=1}^n \log(x_i) + n(\log(\lambda) - \psi(\alpha))\\ \frac{n \alpha}{\lambda} - \sum_{i=1}^nx_i \end{array} \right) \]
And the Fisher information matrix is:
\[ \left[ \begin{array} {cc} n\psi_1(\alpha) & -\frac{n}{\lambda} \\ -\frac{n}{\lambda} & \frac{n \alpha}{\lambda^2} \end{array} \right] \]
gmle <- function(data, epsilon = 10^-6){
# Check that data is a vector
if (!is.vector(data)) stop("data needs to be a vector")
# Get the length of data
n <- length(data)
# Check that the length is not zero
if (n == 0) stop("data vector cannot be empty")
# Start with MoM estimates:
alpha_start <- (mean(data))^2/(var(data))
lambda_start <- mean(data)/var(data)
# Initialize the interators and put them in a vector:
alpha <- alpha_start
lambda <- lambda_start
params <- c(alpha, lambda)
# Define the score
score1 <- sum(log(data)) + n*(log(params[2]) - digamma(params[1]))
score2 <- n*(params[1]/params[2]) - sum(data)
score <- c(score1, score2)
# Define the Hessian (Fisher Info)
hessian11 <- n*trigamma(params[1])
hessian12 <- -n/params[2]
hessian22 <- n*params[1]/(params[2])^2
hessian <- matrix(data = c(hessian11, hessian12,
hessian12, hessian22),
nrow = 2, ncol = 2, byrow = TRUE)
# Define the iteration variable
iter <- 0
while (max(abs(score)) > epsilon) {
# Solve for new params
params_new <- params + solve(hessian, score)
# Store new params
alpha_new <- params_new[1]
lambda_new <- params_new[2]
# Compute new Score
score1_new <- sum(log(data)) + n*(log(params_new[2]) - digamma(params_new[1]))
score2_new <- n*(params_new[1]/params_new[2]) - sum(data)
score_new <- c(score1_new, score2_new)
# Compute new Hessian
hessian11_new <- n*trigamma(params_new[1])
hessian12_new <- -n/params_new[2]
hessian22_new <- n*params_new[1]/(params_new[2])^2
hessian_new <- matrix(data = c(hessian11_new, hessian12_new,
hessian12_new, hessian22_new),
nrow = 2, ncol = 2, byrow = TRUE)
# Overwrite the old variables
params <- params_new
score <- score_new
hessian <- hessian_new
# Add the iterator
iter <- iter + 1
if (iter > 10000) stop("iteration limit reached at 10000")
}
final <- list(mle = params, varcov = solve(hessian), iter = iter)
return(final)
}
Now let’s test the function:
# Create a tibble with 10 different columns each being rgamma with a unique shape and scale combination
testing_data <- tibble(A = rgamma(1000, shape = 0.5, scale = 1),
B = rgamma(1000, shape = 0.3, scale = 1),
C = rgamma(1000, shape = 0.1, scale = 1),
D = rgamma(1000, shape = 1, scale = 1),
E = rgamma(1000, shape = 1, scale = 0.5),
`F` = rgamma(1000, shape = 1, scale = 0.3),
G = rgamma(1000, shape = 1, scale = 0.1),
H = rgamma(1000, shape = 0.1, scale = 0.1))
write_csv(x = testing_data, path = "Testing_Data.csv")
A <- gmle(testing_data$A)$mle
B <- gmle(testing_data$B)$mle
C <- gmle(testing_data$C)$mle
D <- gmle(testing_data$D)$mle
E <- gmle(testing_data$E)$mle
`F` <- gmle(testing_data$`F`)$mle
G <- gmle(testing_data$G)$mle
H <- gmle(testing_data$H)$mle
results <- as_tibble(x = list(A = A,B = B,C = C,D = D,E = E,`F` = `F`,G = G,H = H))
results
Looks pretty good.