# Coefficient field inversion in an elliptic partial differential equation
# 
# Consider the following problem:
#
# min_a J(a):=1/2 int_Omega (u-ud)^2 dx +gamma/2 int_Omega | grad a|^2 dx
# 
# where u is the solution of
#
# -div (exp{a} grad u) = f in Omega,
#               u = 0 on partial Omega.
# 
# Here a  the unknown coefficient field, ud denotes (possibly noisy) data, $f\in H^{-1}(Omega)$ a given force, and $ gamma >= 0$ the regularization parameter.

###############################################################################
# 0. Import dolfin and other modules
from dolfin import *

import math
import numpy as np
import time
import logging

import matplotlib.pyplot as plt
import nb

from cgsolver import CGSolver
from cgsolverSteihaug import CGSolverSteihaug

start = time.clock()

logging.getLogger('FFC').setLevel(logging.WARNING)
logging.getLogger('UFL').setLevel(logging.WARNING)
set_log_active(False)

np.random.seed(seed=1)

###############################################################################
# 1. Create the mesh and define the finite element spaces
nx = 80
ny = 80
mesh = UnitSquareMesh(nx, ny)
Va = FunctionSpace(mesh, 'Lagrange', 1)
Vu = FunctionSpace(mesh, 'Lagrange', 2)

###############################################################################
# 2. Define the true parameter and an initial guess for the parameter to be reconstructed
atrue = interpolate(Expression('log(2 + 6*(pow(x[0] - 0.5,2) + pow(x[1] - 0.5,2) > pow(0.2,2)) )'),Va)
a = interpolate(Expression("log(8.0)"),Va)

plt.figure(figsize=(15,5))
nb.plot(mesh,subplot_loc=121, mytitle="Mesh", show_axis='on')
nb.plot(atrue,subplot_loc=122, mytitle="True parameter field")

###############################################################################
# 3. Define the finite element functions for state and adjoint variables
u = Function(Vu)
p = Function(Vu)

###############################################################################
# 4. Define the trial and test functions used by FEniCS
#    to assemble the weak form of the first and second variations
u_trial, p_trial, a_trial = TrialFunction(Vu), TrialFunction(Vu), TrialFunction(Va)
u_test, p_test, a_test = TestFunction(Vu), TestFunction(Vu), TestFunction(Va)

###############################################################################
# 5. Initialize the parameters of the PDE and the boundary conditions
f = Constant("10.0")
u0 = Constant("0.0")

def boundary(x,on_boundary):
    return on_boundary

bc_state = DirichletBC(Vu, u0, boundary)
bc_adj = DirichletBC(Vu, Constant(0.), boundary)


###############################################################################
# 6. Generate the synthetic observations by solving the forward problem.
#    Remember to modify the definition of a_goal when you solve the
#    advection diffusion problem.

# Set the noise level 
# When you change the noise level, remember to also change the regularization parameter beta
# according to the discrepancy criterion.

# small noise
noise_level = 0.01 #0.1  #0.01 
beta = 5e-8
## Large noise
# noise_level = 0.01
# beta = 5e-8

# Define weak form of the forward problem for setting up the synthetic observations
a_goal = inner(exp(atrue) * nabla_grad(u_trial), nabla_grad(u_test)) * dx
L_goal = f * u_test * dx

# Solve the forward/state problem to generate synthetic observations
goal_A, goal_b = assemble_system(a_goal, L_goal, bc_state)

utrue = Function(Vu)
solve(goal_A, utrue.vector(), goal_b)

ud = Function(Vu)
ud.assign(utrue)

# Perturb state solution and create synthetic measurements ud
# ud = u + eta,
# where eta_i ~ N(0, sigma^2) and sigma = noise_level * || u_true ||_inf
MAX = ud.vector().norm("linf")
noise = Vector()
goal_A.init_vector(noise,1)
noise.set_local( noise_level * MAX * np.random.normal(0, 1, len(ud.vector().array())) )
bc_adj.apply(noise)

ud.vector().axpy(1., noise)

print "Using noise level = ", noise_level
print "(Noise variance)/2 = ", math.pow(noise_level*MAX, 2)/2

nb.multi1_plot([utrue, ud], ["State solution with atrue", "Synthetic observations"])

###############################################################################
# 7. Precompute the finite element matrices W and R for the discretization of
#    the cost functional.
#    Define the function cost that computes the total cost, the misfit componenent,
#    and the regularization component for a given state u and parameter a
#    

W_equ   = inner(u_trial, u_test) * dx
R_equ   = Constant(beta) * inner(nabla_grad(a_trial), nabla_grad(a_test)) * dx

W = assemble(W_equ)
R = assemble(R_equ)

def cost(u, ud, a, W, R):
    diff = u.vector() - ud.vector()
    reg = 0.5 * a.vector().inner(R*a.vector() ) 
    misfit = 0.5 * diff.inner(W * diff)
    return [reg + misfit, misfit, reg]

###############################################################################
# 8. Define the linear and bilinear form the the state/adjoint equations, and
#    incremental forward/adjoint equations.
#    You will need to modify the implementation of a_state and a_adj when you solve
#    the advection-diffusion inversion problem

# weak form for setting up the state equation
a_state = inner(exp(a) * nabla_grad(u_trial), nabla_grad(u_test)) * dx
L_state = f * u_test * dx

# weak form for setting up the adjoint equation
a_adj = inner(exp(a) * nabla_grad(p_trial), nabla_grad(p_test)) * dx
L_adj = -inner(u - ud, p_test) * dx

# additional weak form for the incremental fwd/adj
Wua_equ = inner(exp(a) * a_trial * nabla_grad(p_test), nabla_grad(p)) * dx
C_equ   = inner(exp(a) * a_trial * nabla_grad(u), nabla_grad(u_test)) * dx
Raa_equ = inner(exp(a) * a_trial * a_test *  nabla_grad(u),  nabla_grad(p)) * dx

M_equ   = inner(a_trial, a_test) * dx

# assemble matrix M
M = assemble(M_equ)

###############################################################################
# 9. Solve state equation for the initial guess of the parameter a and evaluate
#    the cost functional
state_A, state_b = assemble_system (a_state, L_state, bc_state)
solve (state_A, u.vector(), state_b)

[cost_old, misfit_old, reg_old] = cost(u, ud, a, W, R)

plt.figure(figsize=(15,5))
nb.plot(a,subplot_loc=121, mytitle="a_ini", vmin=atrue.vector().min(), vmax=atrue.vector().max())
nb.plot(u,subplot_loc=122, mytitle="u(a_ini)")

###############################################################################
# 10. Define the (Gauss-Newton) Hessian apply operator.
#     The method mult(v, Hv) will apply the (Gauss-Newton) Hessian to the vector v:
#     Hv = H*v
#     When using the conjugate gradient method, remember that while the Gauss-Newton Hessian
#     is always positive defined, the Hessian may be undefine far away from the minimum. 
#     
#     We also define a preconditioner P for the (Gauss-Newton) Hessian: P = R + beta M.

class GaussNewtonHessian:
    def __init__(self, R, C, A, adj_A, W):
        self.R = R
        self.C = C
        self.A = A
        self.adj_A = adj_A
        self.W = W
        
    def init_vector(self,x,dim):
        return R.init_vector(x,dim)
        
    def mult(self, v, Hv):
        rhs = -(self.C * v)
        bc_adj.apply(rhs)
        solve (self.A, du, rhs)
        rhs = - (self.W * du)
        bc_adj.apply(rhs)
        solve (self.adj_A, dp, rhs)
        self.C.transpmult(dp, Hv)
        Hv.axpy(1., self.R*v)
        
class Hessian(LinearOperator):
    def __init__(self, R, Raa, C, A, adj_A, W, Wua):
        LinearOperator.__init__(self, a_delta, a_delta)
        self.R = R
        self.Raa = Raa
        self.C = C
        self.A = A
        self.adj_A = adj_A
        self.W = W
        self.Wua = Wua
        
    def init_vector(self,x,dim):
        return R.init_vector(x,dim)
        
    def mult(self,v, Hv):
        rhs = -(self.C * v)
        bc_adj.apply(rhs)
        solve (self.A, du, rhs)
        rhs = -(self.W * du) -  self.Wua * v
        bc_adj.apply(rhs)
        solve (self.adj_A, dp, rhs)
        self.C.transpmult(dp, Hv)
        Wua_du = Vector()
        self.Wua.init_vector(Wua_du, 1)
        self.Wua.transpmult(du, Wua_du)
        Hv.axpy(1., Wua_du)
        Hv.axpy(1., self.R*v)
        Hv.axpy(1., self.Raa*v)
        
        
P = R + beta * M
prec = PETScKrylovSolver("cg", "amg")
prec.set_operator( P)
prec.parameters["relative_tolerance"] = 1e-9
prec.parameters["nonzero_initial_guess"] = False

###############################################################################
# 11. Solve the inverse problem using the Inexact (Gauss)-Newton CG method
#

# define parameters for the optimization
tol = 1e-7      # relative tolerance
c = 1e-5        # c_armijo for backtracking 
maxiter = 20    # Maximum number of (Gauss) Newton iterations
use_GaussNewton = False # If True we will use the Gauss-Newton Hessian,
                        # if False we will use the true Hessian

# initialize iter counters
iter = 1
total_cg_iter = 0
converged = False

# initialize the finite element vector to store the gradient and the Newton direction
g, a_delta = Vector(), Vector()
R.init_vector(a_delta,0)
R.init_vector(g,0)

du, dp = Vector(), Vector()
W.init_vector(du,1)
W.init_vector(dp,0)

a_prev = Function(Va)

print "Nit   CGit   cost          misfit        reg           sqrt(-G*D)    ||grad||       alpha  tolcg"

while iter <  maxiter and not converged:

    # assemble matrix C
    C =  assemble(C_equ)

    # solve the adoint problem
    adjoint_A, adjoint_RHS = assemble_system(a_adj, L_adj, bc_adj)
    solve(adjoint_A, p.vector(), adjoint_RHS)

    # assemble W_ua and R
    Wua = assemble (Wua_equ)
    Raa = assemble (Raa_equ)

    # evaluate the  gradient
    CT_p = Vector()
    C.init_vector(CT_p,1)
    C.transpmult(p.vector(), CT_p)
    MG = CT_p + R * a.vector()
    solve(M, g, MG)

    # calculate the norm of the gradient
    grad2 = g.inner(MG)
    gradnorm = sqrt(grad2)

    # set the CG tolerance (use Eisenstat - Walker termination criterion)
    if iter == 1:
        gradnorm_ini = gradnorm
    tolcg = min(0.5, sqrt(gradnorm/gradnorm_ini))

    # define the Hessian apply operator (with preconditioner)
    if use_GaussNewton:
        Hess_Apply = GaussNewtonHessian(R, C, state_A, adjoint_A, W )
        solver = CGSolver()
    else:
        Hess_Apply = Hessian(R, Raa, C, state_A, adjoint_A, W, Wua )
        solver = CGSolverSteihaug()
    
    solver.set_operator(Hess_Apply)
    solver.set_preconditioner(prec)
    # Set the tolerance for CG determined by the  Eisenstat-Walker conditions
    solver.parameters["rel_tolerance"] = tolcg

    # solve the Newton system H a_delta = - MG
    solver.solve(a_delta, -MG)
    total_cg_iter += solver.iter
    
    # linesearch
    alpha = 1
    descent = False
    no_backtrack = 0
    a_prev.assign(a)
    while not descent and no_backtrack < 10:
        a.vector().axpy(alpha, a_delta )

        # solve the state/forward problem
        state_A, state_b = assemble_system(a_state, L_state, bc_state)
        solve(state_A, u.vector(), state_b)

        # evaluate cost
        [cost_new, misfit_new, reg_new] = cost(u, ud, a, W, R)

        # check if Armijo conditions are satisfied
        if cost_new < cost_old + alpha * c * MG.inner(a_delta):
            cost_old = cost_new
            descent = True
        else:
            no_backtrack += 1
            alpha *= 0.5
            a.assign(a_prev)  # reset a
            
    if not descent:
        raise NameError("Backtracking failed")

    # calculate sqrt(-G * D)
    graddir = sqrt(- MG.inner(a_delta) )

    sp = ""
    print "%2d %2s %2d %3s %8.5e %1s %8.5e %1s %8.5e %1s %8.5e %1s %8.5e %1s %5.2f %1s %5.3e" % \
        (iter, sp, solver.iter, sp, cost_new, sp, misfit_new, sp, reg_new, sp, \
         graddir, sp, gradnorm, sp, alpha, sp, tolcg)
    
    # check for convergence
    if gradnorm < tol*gradnorm_ini and iter > 1:
        converged = True
        if use_GaussNewton:
            print "Gauss Newton's method converged in ",iter,"  iterations"
        else:
            print "Newton's method converged in ",iter,"  iterations"
        print "Total number of CG iterations: ", total_cg_iter
        
    iter += 1
    
if not converged:
    print "Newton's method did not converge in ", maxiter, " iterations"




print "Time elapsed: ", time.clock()-start

nb.multi1_plot([atrue, a], ["atrue", "a"])
nb.multi1_plot([u,p], ["u","p"], same_colorbar=False)

plt.show()