Custom Optimizer

Custom Optimizer

In this article, we describe how to make your custom optimizer

ADCME.CustomOptimizerFunction.
CustomOptimizer(opt::Function, name::String)

creates a custom optimizer with struct name name. For example, we can integrate Optim.jl with ADCME by constructing a new optimizer

CustomOptimizer("Con") do f, df, c, dc, x0, x_L, x_U
    opt = Opt(:LD_MMA, length(x0))
    bd = zeros(length(x0)); bd[end-1:end] = [-Inf, 0.0]
    opt.lower_bounds = bd
    opt.xtol_rel = 1e-4
    opt.min_objective = (x,g)->(g[:]= df(x); return f(x)[1])
    inequality_constraint!(opt, (x,g)->( g[:]= dc(x);c(x)[1]), 1e-8)
    (minf,minx,ret) = NLopt.optimize(opt, x0)
    minx
end

Here

f: a function that returns $f(x)$

df: a function that returns $\nabla f(x)$

c: a function that returns the constraints $c(x)$

dc: a function that returns $\nabla c(x)$

x0: initial guess

nineq: number of inequality constraints

neq: number of equality constraints

x_L: lower bounds of optimizable variables

x_U: upper bounds of optimizable variables

Then we can create an optimizer with

opt = Con(loss, inequalities=[c1], equalities=[c2])

To trigger the optimization, use

minimize(opt, sess)

Note thanks to the global variable scope of Julia, step_callback, optimizer_kwargs can actually be passed from Julia environment directly.

We will show here a few examples of custom optimizer. These examples can be cast to your specific applications.

Ipopt Custom Optimizer

For a concrete example, let us consider using Ipopt as a constrained optimization optimizer.

using Ipopt
using ADCME

IPOPT = CustomOptimizer() do f, df, c, dc, x0, x_L, x_U
    n_variables = length(x0)
    nz = length(dc(x0)) 
  	m = div(nz, n_variables) # Number of constraints
    g_L, g_U = [-Inf;-Inf], [0.0;0.0]
    function eval_jac_g(x, mode, rows, cols, values)
        if mode == :Structure
            rows[1] = 1; cols[1] = 1
            rows[2] = 1; cols[2] = 1
            rows[3] = 2; cols[3] = 1
            rows[4] = 2; cols[4] = 1
        else
            values[:]=dc(x)
        end
    end
  
    nele_jac = 0 # Number of non-zeros in Jacobian
    prob = Ipopt.createProblem(n_variables, x_L, x_U, m, g_L, g_U, nz, nele_jac,
            f, (x,g)->(g[:]=c(x)), (x,g)->(g[:]=df(x)), eval_jac_g, nothing)
    addOption(prob, "hessian_approximation", "limited-memory")
    addOption(prob, "max_iter", 100)
  	addOption(prob, "print_level", 2) # 0 -- 15, the larger the number, the more detailed the information

    prob.x = x0
    status = Ipopt.solveProblem(prob)
    println(Ipopt.ApplicationReturnStatus[status])
    println(prob.x)
    prob.x
end

reset_default_graph() # be sure to reset graph before any optimization
x = Variable([1.0;1.0])
x1 = x[1]; x2 = x[2]; 
loss = x2
g = x1
h = x1*x1 + x2*x2 - 1
opt = IPOPT(loss, inequalities=[g], equalities=[h], var_to_bounds=Dict(x=>(-1.0,1.0)))
sess = Session(); init(sess)
minimize(opt, sess)

Here is a detailed description of the code

function createProblem(
  n::Int,                     # Number of variables
  x_L::Vector{Float64},       # Variable lower bounds
  x_U::Vector{Float64},       # Variable upper bounds
  m::Int,                     # Number of constraints
  g_L::Vector{Float64},       # Constraint lower bounds
  g_U::Vector{Float64},       # Constraint upper bounds
  nele_jac::Int,              # Number of non-zeros in Jacobian
  nele_hess::Int,             # Number of non-zeros in Hessian
  eval_f,                     # Callback: objective function
  eval_g,                     # Callback: constraint evaluation
  eval_grad_f,                # Callback: objective function gradient
  eval_jac_g,                 # Callback: Jacobian evaluation
  eval_h = nothing)           # Callback: Hessian evaluation

NLopt Custom Optimizer

Here is an example of using NLopt for optimization.

using ADCME
using NLopt

p = ones(10)
Con = CustomOptimizer() do f, df, c, dc, x0, x_L, x_U 
    opt = Opt(:LD_MMA, length(x0))
    opt.upper_bounds = 10ones(length(x0))
    opt.lower_bounds = zeros(length(x0))
  	opt.lower_bounds[end-1:end] = [-Inf, 0.0]
    opt.xtol_rel = 1e-4
    opt.min_objective = (x,g)->(g[:]= df(x); return f(x)[1])
    inequality_constraint!(opt, (x,g)->( g[:]= dc(x);c(x)[1]), 1e-8)
    (minf,minx,ret) = NLopt.optimize(opt, x0)
    minx
end

reset_default_graph() # be sure to reset the graph before any operation
x = Variable([1.234; 5.678])
y = Variable([1.0;2.0])
loss = x[2]^2 + sum(y^2)
c1 = (x[1]-1)^2 - x[2] 
opt = Con(loss, inequalities=[c1])
sess = Session(); init(sess)
opt.minimize(sess)
xmin = run(sess, x) # expected: (1., 0.)

Here is the detailed explanation

Drop-in Substitutes of BFGS!

IPOPT

The following codes are for unconstrained optimizattion of BFGS! optimizer. Copy and execute the following code to have access to IPOPT! function.

using PyCall
using Ipopt
using ADCME


function IPOPT!(sess::PyObject, loss::PyObject, max_iter::Int64=15000; 
            verbose::Int64=0, vars::Array{PyObject}=PyObject[], 
                    callback::Union{Function, Nothing}=nothing, kwargs...)
    losses = Float64[]
    loss_ = 0
    cnt_ = -1
    iter_ = 0
    IPOPT = CustomOptimizer() do f, df, c, dc, x0, x_L, x_U
        n_variables = length(x0)
        nz = length(dc(x0)) 
        m = div(nz, n_variables) # Number of constraints
        # g_L, g_U = [-Inf;-Inf], [0.0;0.0]
        g_L = Float64[]
        g_U = Float64[]
        function eval_jac_g(x, mode, rows, cols, values); end
        function eval_f(x)
          loss_ = f(x)
          iter_ += 1
          if iter_==1
            push!(losses, loss_)
            if !isnothing(callback)
                callback(run(sess, vars), cnt_, loss_)
            end
          end
          println("iter $iter_, current loss= $loss_")
          return loss_
        end

        function eval_g(x, g)
          if cnt_>=1
            push!(losses, loss_)
            if !isnothing(callback)
                callback(run(sess, vars), cnt_, loss_)
            end
          end
          cnt_ += 1
          if cnt_>=1
            println("================ ITER $cnt_ ===============")
          end
          g[:]=df(x)
        end
      
        nele_jac = 0 # Number of non-zeros in Jacobian
        prob = Ipopt.createProblem(n_variables, x_L, x_U, m, g_L, g_U, nz, nele_jac,
                eval_f, (x,g)->(), eval_g, eval_jac_g, nothing)
        addOption(prob, "hessian_approximation", "limited-memory")
        addOption(prob, "max_iter", max_iter)
        addOption(prob, "print_level", verbose) # 0 -- 15, the larger the number, the more detailed the information

        prob.x = x0
        status = Ipopt.solveProblem(prob)
        if status == 0
          printstyled(Ipopt.ApplicationReturnStatus[status],"\n", color=:green)
        else 
          printstyled(Ipopt.ApplicationReturnStatus[status],"\n", color=:red)
        end
        prob.x
    end
    opt = IPOPT(loss; kwargs...)
    minimize(opt, sess)
    return losses
end

The usage is exactly the same as BFGS!. Therefore, you can simply replace BFGS! to Ipopt. For example

x = Variable(rand(10))
loss = sum((x-0.6)^2 + (x^2-2x+0.8)^4)
cb = (vs, i, l)->println("$i, $l")
sess = Session(); init(sess)
IPOPT!(sess, loss, vars=[x], callback = cb)

NLOPT

Likewise, NLOPT! also has the dropin substitute of BFGS!

using ADCME
using NLopt
using PyCall

function NLOPT!(sess::PyObject, loss::PyObject, max_iter::Int64=15000; 
            algorithm::Union{Symbol, Enum} = :LD_LBFGS, vars::Array{PyObject}=PyObject[], 
                    callback::Union{Function, Nothing}=nothing, kwargs...)
    losses = Float64[]
    iter_ = 0 
    NLOPT = CustomOptimizer() do f, df, c, dc, x0, x_L, x_U 
        opt = Opt(algorithm, length(x0))
        opt.upper_bounds = x_U
        opt.lower_bounds = x_L
        opt.maxeval = max_iter
        opt.min_objective = (x,g)->begin
            g[:]= df(x); 
            iter_ += 1
            l = f(x)[1]
            println("================ ITER $iter_ ===============")
            println("current loss= $l")
            push!(losses, l)
            if !isnothing(callback)
                callback(run(sess, vars), iter_, l)
            end
            return f(x)[1]
        end
        (minf,minx,ret) = NLopt.optimize(opt, x0)
        minx
    end
    opt = NLOPT(loss; kwargs...)
    minimize(opt, sess)
    return losses
end

For example

x = Variable(rand(10))
loss = sum((x-0.6)^2 + (x^2-2x+0.8)^4)
cb = (vs, i, l)->println("$i, $l")
sess = Session(); init(sess)
NLOPT!(sess, loss, vars=[x], callback = cb, algorithm = :LD_TNEWTON)