knitr::opts_chunk$set(cache = TRUE)
suppressMessages(library(tidyverse))
suppressMessages(library(splines))
set.seed(271828)

Question 1

a)

Since \(V_i\) are independent with mean \(0\) and variance \(1\) we know that \[ \mathbb{E}[V_iV_jV_kV_l] = 0 (i \neq j,k,l) \text{ or } 1(i=j,k=l) \text{ unless } i=j=k=l \]

Then combining all the 3 cases we can see that:

\[ \mathbb{E}[(V^TAV)^2] = \sum_{i=1}^na_{ii}^2\mathbb{E}(V_i^4) + \text{constant} \] Then note that:

\[ \mathbb{E}(V_i^4) = Var(V_i^2) + 1 \] Since we want to minimize this function we can note that \(Var(V_i^2)\) is 0.

And then note that for the probability provided:

\[Var(V_i^2) = E(V_i^4) - E(V_i^2)^2 = \\ = (\frac{1}{2} + \frac{1}{2}) - (\frac{1}{2}+\frac{1}{2})^2 = 1-1=0 \]

Which means taking entries of V to be \(\pm1\) with probability \(\frac{1}{2}\) minimizes \(Var(\hat{tr}(A))\).

b)

\[ H \left[ \begin{array}{c} V \\ 0 \end{array} \right] = \left[ \begin{array} {cc} H_{11} & H_{12} \\ H_{21} & H_{22} \end{array} \right] \left[ \begin{array}{c} V \\ 0 \end{array} \right] = \left[ \begin{array}{c} H_{11}V \\ H_{21}V \end{array} \right] \]

For \(k\):

\[ H\left[ \begin{array}{c} H_{11}^{k-1}V \\ 0 \end{array} \right] = \left[ \begin{array} {cc} H_{11} & H_{12} \\ H_{21} & H_{22} \end{array} \right] \left[ \begin{array}{c} H_{11}^{k-1}V \\ 0 \end{array} \right] = \left[ \begin{array}{c} H_{11}^{k}V \\ H_{21}H_{11}^{k}V \end{array} \right] \]

Then note that it clearly works for \(k = 2\)

Then see that assuming it works for k, we can see that it will work for k+1:

\[ H\left[ \begin{array}{c} H_{11}^{k+1-1}V \\ 0 \end{array} \right] = \left[ \begin{array} {cc} H_{11} & H_{12} \\ H_{21} & H_{22} \end{array} \right] \left[ \begin{array}{c} H_{11}^{k}V \\ 0 \end{array} \right] = \left[ \begin{array}{c} H_{11}^{k+1}V \\ H_{21}H_{11}^{k+1}V \end{array} \right] \]

Then by induction it’s true.

c)

leverage <- function(x,y,w,r=10, k = 100) {
                # Do QR factorization
               qr_x <- qr(x)
               qr_y <- qr(y)
                # Store number of rows
               n <- nrow(x)
               m <- nrow(y)
                # Initialize Leverages
               levx <- NULL
               levy <- NULL
                # Loop
               for (i in 1:m) {
                   v <- ifelse(runif(n) > 0.5,1,-1)
                   v[-w] <- 0
                   v0 <- qr.fitted(qr_x,v)
                   v1 <- qr.fitted(qr_y,v)
                   f <- v0
                   g <- v1
                   for (j in 2:r) {
                      v0[-w] <- 0
                      v1[-w] <- 0
                      v0 <- qr.fitted(qr_x,v0)
                      v1 <- qr.fitted(qr_y,v1)
                      f <- f + v0/j
                      g <- g + v1/j
                      }
                   levx <- c(levx,sum(v*f))
                   levy <- c(levy,sum(v*g))
                   }
                std.err.x <- exp(-mean(levx)) * sd(levx)/sqrt(k)
                std.err.y <- exp(-mean(levy)) * sd(levy)/sqrt(k)
                levx <- 1 - exp(-mean(levx))
                levy <- 1 - exp(-mean(levy))
                r <- list(levx = levx,std.err.x = std.err.x,
                          levy = levy,std.err.y = std.err.y)
                return(r)
}
x <- c(1:1000)/1000
X1 <- 1
for (i in 1:10) {
    X1 <- cbind(X1, x^i)
}

X2 <- cbind(1, bs(x,df = 10))

leverages <- leverage(X1, X2, 1:100)

cat(" The leverages are", leverages$levx, "and", leverages$levy, "\n",
    "The difference is", leverages$levx - leverages$levy, "\n",
    "The larger leverage is for the design: ", if_else(leverages$levx > leverages$levy,
                                                       true = "Normal (non-splines) Design",
                                                       false = "Splines Design"))
##  The leverages are 0.9968716 and 0.9944505 
##  The difference is 0.002421114 
##  The larger leverage is for the design:  Normal (non-splines) Design

Question 2

a)

Method of moments estimates:

\[ \mu =\frac{\alpha}{\lambda}; \sigma^2 = \frac{\alpha}{\lambda^2} \Rightarrow \\ \lambda = \frac{\mu}{\sigma^2} \] Then by substituting into the equation for \(\sigma^2\) we get: \[ \alpha = \frac{\mu^2}{\sigma^2} \]

Now using the sample mean and variance instead of \(\mu\) and \(\sigma^2\)

\[ \alpha = \frac{\bar{X}^2}{S^2} \] \[ \lambda = \frac{\bar{X}}{S^2} \]

b)

\[ L(\alpha, \lambda) = \prod_{i=1}^n{\frac{\lambda^\alpha x_i^{\alpha-1}e^{-\lambda x_i}}{\Gamma(\alpha)}} \]

\[ l(\alpha, \lambda) = \alpha \log(\lambda) + (\alpha - 1)\sum_{i=1}^n{\log(x_i)} - \lambda\sum_{i=1}^nx_i-n \log(\Gamma(\alpha)) \]

\[\frac{\delta l}{\delta \alpha} = n log(\lambda) +\sum_{i=1}^n \log(x_i) - n\psi(\alpha)\]

\[\frac{\delta l}{\delta \lambda} = \frac{n \alpha}{\lambda} - \sum_{i=1}^n{x_i}\] \[ \frac{\delta^2 l}{\delta \alpha^2} = -n \psi_1(\alpha)\] \[ \frac{\delta^2 l}{\delta \lambda^2} = -\frac{n \alpha}{\lambda^2}\] \[ \frac{\delta^2l}{\delta \alpha \delta\lambda} = \frac{n}{\lambda} \]

Where \(\psi\) is the digamma and \(\psi_1\) is the trigamma function.

Then the score is:

\[ \left( \begin{array} {cc} \sum_{i=1}^n \log(x_i) + n(\log(\lambda) - \psi(\alpha))\\ \frac{n \alpha}{\lambda} - \sum_{i=1}^nx_i \end{array} \right) \]

And the Fisher information matrix is:

\[ \left[ \begin{array} {cc} n\psi_1(\alpha) & -\frac{n}{\lambda} \\ -\frac{n}{\lambda} & \frac{n \alpha}{\lambda^2} \end{array} \right] \]

Create the function

gmle <- function(data, epsilon = 10^-6){
    # Check that data is a vector
    if (!is.vector(data)) stop("data needs to be a vector")
    
    # Get the length of data
    n <- length(data)
    
    # Check that the length is not zero 
    if (n == 0) stop("data vector cannot be empty")
    
    # Start with MoM estimates:
    alpha_start <- (mean(data))^2/(var(data))
    lambda_start <- mean(data)/var(data)
    
    # Initialize the interators and put them in a vector:
    alpha <- alpha_start
    lambda <- lambda_start
    params <- c(alpha, lambda)
    
    # Define the score
    score1 <- sum(log(data)) + n*(log(params[2]) - digamma(params[1]))
    score2 <- n*(params[1]/params[2]) - sum(data)
    score <- c(score1, score2)
    
    # Define the Hessian (Fisher Info)
    hessian11 <- n*trigamma(params[1])
    hessian12 <- -n/params[2]
    hessian22 <- n*params[1]/(params[2])^2
    
    hessian <- matrix(data = c(hessian11, hessian12,
                               hessian12, hessian22), 
                      nrow = 2, ncol = 2, byrow = TRUE)
    
    # Define the iteration variable
    iter <- 0
    
    while (max(abs(score)) > epsilon) {
        # Solve for new params
        params_new <- params + solve(hessian, score)
        
        # Store new params
        alpha_new <- params_new[1]
        lambda_new <- params_new[2]
        
        # Compute new Score
        score1_new <- sum(log(data)) + n*(log(params_new[2]) - digamma(params_new[1]))
        score2_new <- n*(params_new[1]/params_new[2]) - sum(data)
        score_new <- c(score1_new, score2_new)
        
        # Compute new Hessian
        hessian11_new <- n*trigamma(params_new[1])
        hessian12_new <- -n/params_new[2]
        hessian22_new <- n*params_new[1]/(params_new[2])^2
        hessian_new <- matrix(data = c(hessian11_new, hessian12_new,
                                       hessian12_new, hessian22_new), 
                      nrow = 2, ncol = 2, byrow = TRUE)
        
        # Overwrite the old variables
        params <- params_new
        score <- score_new
        hessian <- hessian_new
        
        # Add the iterator
        iter <- iter + 1
        
        if (iter > 10000) stop("iteration limit reached at 10000")
        
    }
    final <- list(mle = params, varcov = solve(hessian), iter = iter)
    return(final)
}

Now let’s test the function:

# Create a tibble with 10 different columns each being rgamma with a unique shape and scale combination

testing_data <- tibble(A = rgamma(1000, shape = 0.5, scale = 1),
                       B = rgamma(1000, shape = 0.3, scale = 1),
                       C = rgamma(1000, shape = 0.1, scale = 1),
                       D = rgamma(1000, shape = 1, scale = 1),
                       E = rgamma(1000, shape = 1, scale = 0.5),
                       `F` = rgamma(1000, shape = 1, scale = 0.3),
                       G = rgamma(1000, shape = 1, scale = 0.1),
                       H = rgamma(1000, shape = 0.1, scale = 0.1))
write_csv(x = testing_data, path = "Testing_Data.csv")

A <- gmle(testing_data$A)$mle
B <- gmle(testing_data$B)$mle
C <- gmle(testing_data$C)$mle
D <- gmle(testing_data$D)$mle
E <- gmle(testing_data$E)$mle
`F` <- gmle(testing_data$`F`)$mle
G <- gmle(testing_data$G)$mle
H <- gmle(testing_data$H)$mle

results <- as_tibble(x = list(A = A,B = B,C = C,D = D,E = E,`F` = `F`,G = G,H = H))

results

Looks pretty good.