This forum has been archived. All content is frozen. Please use KDE Discuss instead.

Automatic differentiation

Tags: None
(comma "," separated)
zoharl
Registered Member
Posts
55
Karma
0
OS

Automatic differentiation

Sat Jun 01, 2013 5:37 am
Hi,

My objective function is written in Eigen. I haven't tried yet automatic differentiation in C++ (and in Matlab it was almost as slow as finite differences). I was wondering what's new on the subject (there seems to be an undocumented related class in the unsupported), and are there any recommendations (good AD library that works well with Eigen)?

Zohar
User avatar
ggael
Moderator
Posts
3447
Karma
19
OS

Re: Automatic differentiation

Sat Jun 01, 2013 8:15 am
The one in unsupported/Eigen/AutoDiff works well. Here is a self contained example combining it with the Levenberg-Marquart solver:
Code: Select all
#include <Eigen/Dense>
#include <unsupported/Eigen/NonLinearOptimization>
#include <unsupported/Eigen/AutoDiff>
#include <iostream>

// Computes the energy terms into costs. The first part contains termes related to edge length variation,
// and the second to angle variations.
template<typename Scalar>
void ikLikeCosts(const Eigen::Matrix<Scalar,Eigen::Dynamic,1>& curve, const Eigen::VectorXd& targetAngles, const Eigen::VectorXd& targetLengths, double beta,
                 Eigen::Matrix<Scalar,Eigen::Dynamic,1>& costs)
{
    using namespace Eigen;
    using std::atan2;

    typedef Matrix<Scalar,2,1> Vec2;
    int nb = curve.size()/2;

    costs.setZero();
    for(int k=1;k<nb-1;++k)
    {
        Vec2 pk0 = curve.template segment<2>(2*(k-1));
        Vec2 pk  = curve.template segment<2>(2*k);
        Vec2 pk1 = curve.template segment<2>(2*(k+1));

        if(k+1<nb-1) {
            costs((nb-2)+(k-1)) = (pk1-pk).norm() - targetLengths(k-1);
        }
       

        Vec2 v0 = (pk-pk0).normalized();
        Vec2 v1 = (pk1-pk).normalized();

        costs(k-1) = beta * (atan2(-v0.y() * v1.x() + v0.x() * v1.y(), v0.x() * v1.x() + v0.y() * v1.y()) - targetAngles(k-1));
    }
}

// Generic functor
template<typename _Scalar>
struct Functor
{
    typedef _Scalar Scalar;
    enum {
        InputsAtCompileTime = Eigen::Dynamic,
        ValuesAtCompileTime = Eigen::Dynamic
                          };
    typedef Eigen::Matrix<Scalar,InputsAtCompileTime,1> InputType;
    typedef Eigen::Matrix<Scalar,ValuesAtCompileTime,1> ValueType;
    typedef Eigen::Matrix<Scalar,ValuesAtCompileTime,InputsAtCompileTime> JacobianType;

    const int m_inputs, m_values;

    Functor() : m_inputs(InputsAtCompileTime), m_values(ValuesAtCompileTime) {}
    Functor(int inputs, int values) : m_inputs(inputs), m_values(values) {}

    int inputs() const { return m_inputs; } // number of degree of freedom (= 2*nb_vertices)
    int values() const { return m_values; } // number of energy terms (= nb_vertices + nb_edges)

    // you should define that in the subclass :
    //    void operator() (const InputType& x, ValueType* v, JacobianType* _j=0) const;
};

// Specialized functor warping the ikLikeCosts function
struct iklike_functor : Functor<double>
{
    typedef Eigen::AutoDiffScalar<Eigen::Matrix<Scalar,Eigen::Dynamic,1> > ADS;
    typedef Eigen::Matrix<ADS, Eigen::Dynamic, 1> VectorXad;

    // pfirst and plast are the two extremities of the curve
    iklike_functor(const Eigen::VectorXd& targetAngles, const Eigen::VectorXd& targetLengths, double beta, const Eigen::Vector2d pfirst, const Eigen::Vector2d plast)
        :   Functor<double>(targetAngles.size()*2-4,targetAngles.size()*2-1),
            m_targetAngles(targetAngles), m_targetLengths(targetLengths), m_beta(beta),
            m_pfirst(pfirst), m_plast(plast)
    {}

    // input = x = {  ..., x_i, y_i, ....}
    // output = fvec = the value of each term
    int operator()(const Eigen::VectorXd &x, Eigen::VectorXd &fvec)
    {
        using namespace Eigen;
        VectorXd curves(this->inputs()+8);

        curves.segment(4,this->inputs()) = x;

        Vector2d d(1,0);
        curves.segment<2>(0)                   = m_pfirst - d;
        curves.segment<2>(2)                   = m_pfirst;
        curves.segment<2>(this->inputs()+4)    = m_plast;
        curves.segment<2>(this->inputs()+6)    = m_plast + d;

        ikLikeCosts(curves, m_targetAngles, m_targetLengths, m_beta, fvec);
        return 0;
    }

    // Compute the jacobian into fjac for the current solution x
    int df(const Eigen::VectorXd &x, Eigen::MatrixXd &fjac)
    {
        using namespace Eigen;
        VectorXad curves(this->inputs()+8);

        // Compute the derivatives of each degree of freedom
        // -> grad( x_i ) = (0, ..., 0, 1, 0, ..., 0) ; 1 is in position i
        for(int i=0; i<this->inputs();++i)
            curves(4+i) = ADS(x(i), this->inputs(), i);

        Vector2d d(1,0);
        curves.segment<2>(0)                   = (m_pfirst - d).cast<ADS>();
        curves.segment<2>(2)                   = (m_pfirst).cast<ADS>();
        curves.segment<2>(this->inputs()+4)    = (m_plast).cast<ADS>();
        curves.segment<2>(this->inputs()+6)    = (m_plast + d).cast<ADS>();

        VectorXad v(this->values());

        ikLikeCosts(curves, m_targetAngles, m_targetLengths, m_beta, v);

        // copy the gradient of each energy term into the Jacobian
        for(int i=0; i<this->values();++i)
            fjac.row(i) = v(i).derivatives();

        return 0;
    }

    const Eigen::VectorXd& m_targetAngles;
    const Eigen::VectorXd& m_targetLengths;
    double m_beta;
    Eigen::Vector2d m_pfirst, m_plast;
};



void draw_vecX(const Eigen::VectorXd& res)
{
  for( int i = 0; i < (res.size()/2) ; i++ )
  {
    std::cout << res.segment<2>(2*i).transpose() << "\n";
  }
}


using namespace Eigen;

int main()
{
    Eigen::Vector2d pfirst(-5., 0.);
    Eigen::Vector2d plast ( 5., 0.);

    // rest pose is a straight line starting between first and last point
    const int nb_points = 30;
    Eigen::VectorXd targetAngles (nb_points);
    targetAngles.fill(0);

    Eigen::VectorXd targetLengths(nb_points-1);
    double val = (pfirst-plast).norm() / (double)(nb_points-1);
    targetLengths.fill(val);


    // get initial solution
    Eigen::VectorXd x((nb_points-2)*2);
    for(int i = 1; i < (nb_points - 1); i++)
    {
        double s = (double)i / (double)(nb_points-1);
        x.segment<2>((i-1)*2) = plast * s + pfirst * (1. - s);
    }

    // move last point
    plast = Eigen::Vector2d(4., 1.);

    // Create the functor object
    iklike_functor func(targetAngles, targetLengths, 0.1, pfirst, plast);

    // construct the solver
    Eigen::LevenbergMarquardt<iklike_functor> lm(func);

    // adjust tolerance
    lm.parameters.ftol *= 1e-2;
    lm.parameters.xtol *= 1e-2;
    lm.parameters.maxfev = 2000;


    int a = lm.minimize(x);
    std::cerr << "info = " << a  << " " << lm.nfev << " " << lm.njev << " "  <<  "\n";

    std::cout << pfirst.transpose() << "\n";
    draw_vecX( x);
    std::cout << plast.transpose() << "\n";
}
zoharl
Registered Member
Posts
55
Karma
0
OS

Re: Automatic differentiation

Sat Jun 01, 2013 11:37 am
Great, thanks; let me try it.
zoharl
Registered Member
Posts
55
Karma
0
OS

Re: Automatic differentiation

Sat Jun 01, 2013 12:22 pm
Just to have a better context, what exactly is the problem that you are solving?

Does it support Hessian for Newton method?

Do you have a rough estimation of the AD performance vs. (manual) analytical solution vs. finite differences?

I can follow the code, but is there a chance for a simpler cost function such as f(x) = x*x, or f(x) = x'*A*x (quadratic form for some constant matrix A, where x' is the transpose of x)?
User avatar
ggael
Moderator
Posts
3447
Karma
19
OS

Re: Automatic differentiation

Sat Jun 01, 2013 9:57 pm
Imagine a bar modeled as polyline with some stretching and bending forces, here modeled as penalties on the segment length differences and angle differences. One extremity is fixed, and the other is one is moved. (this is not a physical simulation, there is no mass, no gravity, no time, etc.).

You nest AD to compute 1st and 2nd order derivatives, and thus get the Hessian. From my experience, auto-diff was always faster than finite difference, but I guess it might depend on the problem. Manual differentiation is significantly faster because you can take advantage of higher level differentiation rules, remarkable identities, factorizations, etc.
zoharl
Registered Member
Posts
55
Karma
0
OS

Re: Automatic differentiation

Tue Jun 11, 2013 12:43 pm
I forgot to ask: Does it support reverse mode? If not, any recommendations on something that does?
User avatar
ggael
Moderator
Posts
3447
Karma
19
OS

Re: Automatic differentiation

Tue Jun 11, 2013 7:22 pm
No reverse mode. If I remember correctly Adol-C (http://www.coin-or.org/projects/ADOL-C.xml) has a reverse mode. There is a support file in unsupported/Eigen/AdolcSupport for the forward mode. I guess it cann easily be adapted to work with the tape based part of the lib that allows reverse mode.
zoharl
Registered Member
Posts
55
Karma
0
OS

Re: Automatic differentiation

Wed Jun 12, 2013 8:42 am
Thanks, I'll check it out.
zoharl
Registered Member
Posts
55
Karma
0
OS

Re: Automatic differentiation

Thu Jun 13, 2013 7:38 am
Another question...
Is there a support for the sparsity pattern of the Jacobian?
User avatar
ggael
Moderator
Posts
3447
Karma
19
OS

Re: Automatic differentiation

Thu Jun 13, 2013 4:01 pm
I've no idea about Adolc. Regarding our AutoDiff, I remember it worked with sparse vectors for derivatives thus producing a sparse jacobian.
zoharl
Registered Member
Posts
55
Karma
0
OS

Re: Automatic differentiation

Thu Jun 13, 2013 5:19 pm
No, I didn't mean that it supports sparse matrices, I think that is a given. I meant it exploits the problem sparsity and avoids computation (seeding) of terms w.r.t. to some variables. Consider for example the minimization of the Dirichlet energy on a regular 2D grid (similar to your previous problem, but without the angles):

\sum_{i=1}^n\sum_{j=N(i)}\|v_j-v_i\|^2

where |N(i)|=4 are the number of neighbors of vertex i. The pseudo (matlab) code of the objective function is:

Code: Select all
% init
n = 500;
v = rand(n, 1);
ne = zeros(n, 4);

for i=1:4
  ne(:,i) = circshift((1:n)', -i.^2);
end


Code: Select all
function E = f(v, ne)
  n = length(v);
  E = 0;
  for i = 1:n
    for j = 1:4
      E = E + norm(v(i)-v(ne(i,j))).^2;
    end
 end
end


which takes O(n^2), while:

Code: Select all
% g:R^n->R^n
function t = g(v, ne)
  n = length(v);
  t = zeros(length(v), 4);
  for i = 1:n
    for j = 1:4
      t(i,j) = norm(v(i)-v(ne(i,j))).^2;
    end
  end
end


Code: Select all
function E = h(t)
  E = sum(t(:));
end


minimizing [t, Jg] = g(v) with a spare Jacobian pattern (each of the n terms in t is seeded only for 4 variables instead of n variables), then feeding it to a minimization of [vo, Jh] = h(t), and then:

grad = Jh * Jg;

takes O(n).
zoharl
Registered Member
Posts
55
Karma
0
OS

Re: Automatic differentiation

Fri Jun 14, 2013 7:44 pm
Here, maybe example 4.1.1 on page 25 would be clearer:

http://www.google.co.il/url?sa=t&rct=j& ... RHWb5HcyDg


Bookmarks



Who is online

Registered users: Bing [Bot], Google [Bot], Sogou [Bot]