This forum has been archived. All content is frozen. Please use KDE Discuss instead.

Performance issue.

Tags: None
(comma "," separated)
sth4nth
Registered Member
Posts
13
Karma
0

Performance issue.

Mon Mar 12, 2012 1:32 am
I did a comparison between eigen and armadillo. Here is the code
Code: Select all
#include <Eigen/Dense>
#include <armadillo>
using namespace Eigen;
using namespace arma;
using namespace std;

mat squareDistance(const mat& X, const mat& Y)
{
   return repmat(sum(X % X,0).t(),1,Y.n_cols)+repmat(sum(Y % Y,0), X.n_cols,1)-2*(X.t()*Y);
}

MatrixXd squareDistance(const MatrixXd& X, const MatrixXd& Y)
{
   return X.cwiseProduct(X).colwise().sum().transpose().replicate(1,Y.cols())
      + Y.cwiseProduct(Y).colwise().sum().replicate(X.cols(),1)
      - 2*(X.transpose()*Y);
}

mat squareDistanceBrute(const mat& X, const mat& Y)
{
   mat Z(X.n_cols,Y.n_cols);
   for(size_t i = 0; i < X.n_cols; ++i)
   {
      for(size_t j = 0; j < Y.n_cols; ++j)
      {
         auto v = X.col(i)-Y.col(j);
         Z(i,j) = dot(v,v);
          //Z(i,j) = sum(square(X.col(i)-Y.col(j))); //does not work
      }
   }
   return Z;
}

MatrixXd squareDistanceBrute(const MatrixXd& X, const MatrixXd& Y)
{
   MatrixXd Z(X.cols(),Y.cols());
   for(int i = 0; i < X.cols(); ++i)
      for(int j = 0; j < Y.cols(); ++j)
         Z(i,j) = (X.col(i)-Y.col(j)).squaredNorm();
   return Z;
}

int _tmain(int argc, _TCHAR* argv[])
{
   cout<<"Square Euclidean Distance Computation Benchmark."<<endl;
   wall_clock timer;
   int d = 100;
   int n = 1000;
   {
      mat X(d,n); X.randu();
      mat Y(d,n); Y.randu();

      timer.tic();
      mat Z1 = squareDistance(X,Y);
      cout<<"Armadillo Matrix: "<<timer.toc()<<endl;
      timer.tic();
      mat Z2 = squareDistanceBrute(X,Y);
      cout<<"Armadillo Brute Force: "<<timer.toc()<<endl;

      cout<<max(max(abs(Z1-Z2)))<<endl;
   }

   {
      MatrixXd X = MatrixXd::Random(d,n);
      MatrixXd Y = MatrixXd::Random(d,n);

      timer.tic();
      MatrixXd Z1= squareDistance(X,Y);
      cout<<"Eigen Matrix: "<<timer.toc()<<endl;

      timer.tic();
      MatrixXd Z2 = squareDistanceBrute(X,Y);
      cout<<"Eigen Brute Force: "<<timer.toc()<<endl;

      cout << (Z1-Z2).array().abs().maxCoeff() << endl;
   }

   return 0;
}

Here is the output in VC 11:
Square Euclidean Distance Computation Benchmark.
Armadillo Matrix: 0.18
Armadillo Brute Force: 2.413
6.03961e-014
Eigen Matrix: 0.469
Eigen Brute Force: 0.291
8.52651e-014

In VC2010, the result is similar.
The brute force method of Eigen is fast, however, directly computation by manipulate matrix using Eigen is slow (2~3 times compare to armadillo).

What is the problem, or what is the right way to do.
Hauke
Registered Member
Posts
109
Karma
3
OS

Re: Performance issue.

Mon Mar 12, 2012 7:51 am
Hi sth4nth,

are you compiling in 32bit or 64bit mode? If you are compiling in 32bit mode, make sure you enabled SSE.

If you have already enabled SSE or you are in 64bit mode, the problem could reside in the replicate statement. We may have a performance issue there regarding how statements are nested and evaluated. I would have to double check the problem if none of the above issue improve the runtime.

- Hauke
User avatar
ggael
Moderator
Posts
3447
Karma
19
OS

Re: Performance issue.

Tue Mar 13, 2012 8:44 am
thanks for this report, there is indeed an issue in Replicate which does not properly use our nesting rules. In clear, the nested expression is not evaluated into a temporary, and so it is evaluated multiple times. This is easy to fix, in the meantime, the workaround is to use .eval(), e.g.:

Code: Select all
return X.colwise().squaredNorm().eval().transpose().replicate(1,Y.cols())
      + Y.colwise().squaredNorm().eval().replicate(X.cols(),1)
      - 2*(X.transpose()*Y);


-> much faster!
sth4nth
Registered Member
Posts
13
Karma
0

Re: Performance issue.

Wed Mar 14, 2012 1:24 pm
Thanks, ggael. Indeed, much much faster (5x faster than before, 2x faster than armadillo). Hope you fix the problem soon.
sth4nth
Registered Member
Posts
13
Karma
0

Re: Performance issue.

Wed Mar 14, 2012 2:01 pm
Eigen in VC is very fast, the matrix version of my test case runs about 0.8s. However, I tested again in Mac, under Apple llvm 3.1, it runs slow. Here are results

Armadillo Matrix: 0.548987
Armadillo Brute Force: 4.88918
7.81597e-14
Eigen Matrix: 1.75017
Eigen Brute Force: 6.24062

Here is the code
Code: Select all

#include <iostream>
#include <Eigen/Dense>
#include <armadillo>



using namespace Eigen;
using namespace arma;
using namespace std;

mat squareDistance(const mat& X, const mat& Y)
{
   return repmat(sum(X % X,0).t(),1,Y.n_cols)+repmat(sum(Y % Y,0), X.n_cols,1)-2*(X.t()*Y);
}

MatrixXd squareDistance(const MatrixXd& X, const MatrixXd& Y)
{
   return X.colwise().squaredNorm().eval().transpose().replicate(1,Y.cols())
    + Y.colwise().squaredNorm().eval().replicate(X.cols(),1)
    - 2*(X.transpose()*Y);
}

mat squareDistanceBrute(const mat& X, const mat& Y)
{
   mat Z(X.n_cols,Y.n_cols);
   for(size_t i = 0; i < X.n_cols; ++i)
      for(size_t j = 0; j < Y.n_cols; ++j)
         Z(i,j) = accu(square(X.col(i)-Y.col(j)));
   return Z;
}

MatrixXd squareDistanceBrute(const MatrixXd& X, const MatrixXd& Y)
{
   MatrixXd Z(X.cols(),Y.cols());
   for(int i = 0; i < X.cols(); ++i)
      for(int j = 0; j < Y.cols(); ++j)
         Z(i,j) = (X.col(i)-Y.col(j)).squaredNorm();
   return Z;
}

int main(int argc, const char * argv[])
{
   
   cout << arma_version::as_string() << endl;
   cout<<"Square Euclidean Distance Computation Benchmark."<<endl;
   wall_clock timer;
   int d = 100;
   int n = 1000;
   {
      mat X(d,n); X.randu();
      mat Y(d,n); Y.randu();
       
      timer.tic();
      mat Z1 = squareDistance(X,Y);
      cout<<"Armadillo Matrix: "<<timer.toc()<<endl;
      timer.tic();
      mat Z2 = squareDistanceBrute(X,Y);
      cout<<"Armadillo Brute Force: "<<timer.toc()<<endl;
       
      cout<<max(max(abs(Z1-Z2)))<<endl;
   }
   
   {
      MatrixXd X = MatrixXd::Random(d,n);
      MatrixXd Y = MatrixXd::Random(d,n);
       
      timer.tic();
      MatrixXd Z1= squareDistance(X,Y);
      cout<<"Eigen Matrix: "<<timer.toc()<<endl;
       
      timer.tic();
      MatrixXd Z2 = squareDistanceBrute(X,Y);
      cout<<"Eigen Brute Force: "<<timer.toc()<<endl;
       
      cout << (Z1-Z2).array().abs().maxCoeff() << endl;
   }
   
   return 0;
}

User avatar
ggael
Moderator
Posts
3447
Karma
19
OS

Re: Performance issue.

Wed Mar 14, 2012 4:26 pm
I cannot reproduce with the default mac compiler (llvm-gcc)
can you be more specific about the version of your compiler, llvm+gcc ? llvm+clang ? the macport version ? anyway make sure you compiled with the optimization enabled (-O2 -DNDEBUG)
sth4nth
Registered Member
Posts
13
Karma
0

Re: Performance issue.

Wed Mar 14, 2012 4:46 pm
Ignore my previous post. I was stupidly running in debug mode. Here is the result for release mode in xcode 4.3.1
Armadillo Matrix: 0.252057
Armadillo Brute Force: 0.245539
7.81597e-14
Eigen Matrix: 0.095675
Eigen Brute Force: 0.160599

Eigen performs extremelly well.


Bookmarks



Who is online

Registered users: Bing [Bot], claydoh, Google [Bot], rblackwell, Yahoo [Bot]