Nelder-Mead methodをRで実装した

最適化理論の課題提出期限が過ぎたと思うので、うpします。この辺の軌跡を書くためのコードです。
www.yasuhisay.info
Nelder-Mead methodがどんな手法の説明はこの辺に書いている。一行で要約すると、ヘッセ行列、勾配ベクトルすら使わずに関数値のみで動いていく最適化手法で変数が非常に多いときなどに威力を発揮するような手法です。

課題では(図示するために)2変数の最適化問題となっていますが、コード自体はさらなる多変数でも動くように作ってます。まあ、結局optim関数で実装されているので実際に使うことはないと思うけど、自分でいじりたくなったときのためということで。あと、パラメータを変えたときにTeXをちまちま編集するのが面倒だったので、RでTeXを吐いてincludeするようにしているなど。収束判定は結構微妙なところがあり、論文にもいろいろ書いてあったが面倒だってので定数回のiterationで打ち切てます。

objective_function <- function(x) {
  y <- x[2]; x <- x[1]
  100 * (y - x^2)^2 + (1 - x)^2
}

init <- function(f, x) { # x_h、x_s、x_l、x_gを返す関数
  list_ave <- function(l) {
    Reduce("+", l, rep(0, length(l[[1]]))) / length(l)
  }
  value <- unlist(lapply(x, function(x) do.call(f, list(x))))
  x_h <- x[[which.max(value)]] # 目的関数が最大の点 x_h
  x_s <- x[[tail(order(value), n=2)[1]]] # 目的関数が二番目に大きい点 x_s
  x_l <- x[[which.min(value)]] # 目的関数が最小の点 x_l
  x[[which.max(value)]] <- NULL
  x_g <- list_ave(x)
  return(list(x_h = x_h, x_s = x_s, x_l = x_l, x_g = x_g))
}

reflection <- function(x_g, x_h, alpha) { # 反射
  return((1 + alpha) * x_g - alpha * x_h) # x_r
}

expansion <- function(x_r, x_g, gamma) { # 拡大
  return(gamma * x_r + (1 - gamma) * x_g) # x_e    
}

contraction <- function(x_h, x_g, beta) { # 縮小
  return(beta * x_h + (1 - beta) * x_g) # x_c
}

reduction <- function(l, x_l) { # 収縮
  # xはlist、x_lはvector
  lapply(l, function(x) (x + x_l)/ 2)
}

simplex_method <- function(f, init_list, alpha = 1, beta = 0.5, gamma = 2, delta = 2,
                           max_iteration = 30) {

  x <- y <- seq(-1.5, 1.5, length.out=50)
  z <- matrix(0, length(x), length(y))
  
  for(i in 1:length(x)) {
    for(j in 1:length(y)) {
      z[i, j] <- objective_function(c(x[i], y[j]))
    }
  }
  
  png(paste("alpha.", alpha, "beta.", beta, "gamma.", gamma, ".png", sep=""))
  
  contour(x, y, z,
          main=paste("alpha = ", alpha, ", beta = ", beta, ", gamma = ", gamma),
          method = "simple",
          nlevels = 30,
          drawlabels = FALSE)
  
  # must define the variables here
  x_h <- x_s <- x_l <- x_g <- rep(0, length(init_list[[1]]))
  
  min_value_history <- c()
  
  iteration <- 0
##   while(delta > 2 * (abs(do.call(f, list(x_h)) - do.call(f, list(x_l)))) 
##         / (abs(do.call(f, list(x_h))) + abs(do.call(f, list(x_l)))) &&
##         iteration < max_iteration) {
  while(iteration < max_iteration) {
    min_value_history <- c(min_value_history, do.call(f, list(x_l)))
    iteration <- iteration + 1
    
    old_min <- do.call(f, list(x_l))
    cat(paste(paste("Iteration", iteration),
              round(do.call(f, list(x_h)), 3), 
              round(do.call(f, list(x_s)), 3),
              round(do.call(f, list(x_l)), 3),
              round(do.call(f, list(x_g)), 3),
              sep=", "), fill=TRUE)
    
    polygon(c(x_h[1], x_s[1], x_l[1]), c(x_h[2], x_s[2], x_l[2]))
    Sys.sleep(1)
    
    # 1 order
    tmp <- init(f, init_list)
    x_h <- tmp$x_h; x_s <- tmp$x_s; x_l <- tmp$x_l;
    # 2 
    x_g <- tmp$x_g

    # 3 reflection
    x_r <- reflection(x_g, x_h, alpha)
    if(do.call(f, list(x_l)) < do.call(f, list(x_r)) &
       do.call(f, list(x_r)) < do.call(f, list(x_s))) { 
      x_h <- x_r
      init_list <- list(x_h, x_s, x_l)
      next
    }

    # 4 expansion
    if(do.call(f, list(x_r)) < do.call(f, list(x_l))) { 
      x_e <- expansion(x_r, x_g, gamma)
      if(do.call(f, list(x_e)) < do.call(f, list(x_r))) {
        x_h <- x_e

      } else {
        x_h <- x_r
      }
      init_list <- list(x_h, x_s, x_l)
      next
    }
    
    # 5 contraction
    if(do.call(f, list(x_h)) < do.call(f, list(x_r))) { 
      x_c <- contraction(x_h, x_g, beta)
      if(do.call(f, list(x_c)) < do.call(f, list(x_h))) {
        x_h <- x_c
        init_list <- list(x_h, x_s, x_l)
        next
      }
    }

    # 6 reduction
    init_list <- reduction(init_list, x_l)

  }
  dev.off()
  return(list(x_l = x_l, alpha = alpha, beta = beta, gamma = gamma,
              min_value_history = min_value_history))
}

plot.iteration <- function(result) {
  optimal <- result$x_l
  iteration <- result$min_value_history
  alpha <- result$alpha
  beta <- result$beta
  gamma <- result$gamma
  png(paste("Iteration.alpha.", alpha, "beta.", beta, "gamma.", gamma, ".png", sep=""))
  plot(iteration, type="l", xlab="Iteration", ylab="Value of Functioon", 
       main=paste("Optimal Point : (", round(optimal[1], 3), ", ", round(optimal[2], 3), ")\n",
         "alpha = ", alpha, ", beta = ", beta, ", gamma = ", gamma))
  dev.off()
}

write.tex <- function(result, target_param) {
  alpha <- result$alpha
  beta <- result$beta
  gamma <- result$gamma
  
  current_dir <- getwd()
  setwd("../")
  param <- paste("alpha.", alpha, "beta.", beta, "gamma.", gamma, sep="")
  cat(
"\\begin{figure}[htbp]
 \\begin{tabular}{cc}
  \\begin{minipage}{0.5\\hsize}
   \\begin{center}
    \\includegraphics[scale=0.5]{./img/", param, ".eps}
    \\caption{simplexの列}
    \\label{", paste(target_param, ".", param, sep=""), "}
   \\end{center}
  \\end{minipage}
  \\begin{minipage}{0.5\\hsize}
   \\begin{center}
    \\includegraphics[scale=0.5]{./img/Iteration.", param, ".eps}
    \\caption{最適解への収束の様子}
    \\label{", paste(target_param, ".Iteration.", param, sep=""), "}
   \\end{center}
  \\end{minipage}
 \\end{tabular}
\\end{figure}",
      file=paste(target_param, ".", param, ".tex", sep=""), sep="")
  setwd(current_dir)
}

warnings()

getwd()
setwd("/Users/yasuhisa/optimization_theory/final/img")

# change alpha
alpha <- c(0.3, 1, 4)
for(a in alpha){
  result <- simplex_method(objective_function,
                           list(c(0.5, -2), c(1.2, 1.5), c(-1.2, 1)),
                           alpha = a, beta = 0.5, gamma = 2,
                           delta = 2)
  plot.iteration(result)
  write.tex(result, "alpha")
}

# change beta
beta <- c(0.1, 0.5, 0.9)
for(b in beta){
  result <- simplex_method(objective_function,
                           list(c(0.5, -2), c(1.2, 1.5), c(-1.2, 1)),
                           alpha = 1, beta = b, gamma = 2,
                           delta = 2)
  plot.iteration(result)
  write.tex(result, "beta")
}

# change gamma
gamma <- c(0.5, 2, 10)
for(g in gamma){
  result <- simplex_method(objective_function,
                           list(c(0.5, -2), c(1.2, 1.5), c(-1.2, 1)),
                           alpha = 1, beta = 0.5, gamma = g,
                           delta = 2)
  plot.iteration(result)
  write.tex(result, "gamma")
}

これなら分かる最適化数学―基礎原理から計算手法まで

これなら分かる最適化数学―基礎原理から計算手法まで