首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >cholesky分解ScaLapack误差

cholesky分解ScaLapack误差
EN

Stack Overflow用户
提问于 2013-01-03 21:39:12
回答 1查看 2.2K关注 0票数 15

我得到了下面的错误,我不知道为什么。

代码语言:javascript
复制
{    1,    1}:  On entry to PDPOTRF parameter number    2 had an illegal value
{    1,    0}:  On entry to PDPOTRF parameter number    2 had an illegal value
{    0,    1}:  On entry to PDPOTRF parameter number    2 had an illegal value
{    0,    0}:  On entry to PDPOTRF parameter number    2 had an illegal value


 info < 0:  If the i-th argument is an array and the j-entry had an illegal value, then INFO = -(i*100+j), if the i-th argument is a scalar and had an illegal value, then INFO = -i. 

我知道错误信息意味着什么,但我尽可能地遵循web上可用的日期文档,并试图从web上的工作示例代码中拼凑出一个并行的cholesky分解。我不知道我哪里出了问题。

有人能解释一下我在下面的代码中哪里出错了吗?下面是代码的概述,我正在用4个处理器进行测试,并将8x8矩阵划分为2x2处理器块网格,从文件中加载一个矩阵,下面是一个示例8x8矩阵文件,

代码语言:javascript
复制
182   147   140   125   132    76   126   157
147   213   185   150   209   114   166   188
140   185   232   129   194   142   199   205
125   150   129   143   148    81   104   150
132   209   194   148   214   122   172   189
76   114   142    81   122   102   129   117
126   166   199   104   172   129   187   181
157   188   205   150   189   117   181   259

我遵循示例将矩阵分发给4个独立的4x4本地数组,每个节点上都有一个。然后运行descinit_并调用相关的pdpotrf_例程,这将产生上述错误。我不知道我哪里出了问题,并试图尽我所能地跟踪文档。fortran中并行cholesky分解的一个工作示例也将非常有用。

函数调用的引用()

_

_

运行参数

代码语言:javascript
复制
code name - Meaning = Value
N - Global Rows = 8
M - Global Cols = 8
Nb - Local Block Rows = 2
Mb - Local Block Cols = 2
nrows - Local Rows = 4
ncols Local Cols= 4
lda - Leading dimension of local array = 4 (i've tried 2,4,8)
ord - Order of Matrix = 4   (i've also tried many different things here as well)

我在每个节点上打印了上述参数,它们是相同的。

代码语言:javascript
复制
#include <mpi.h>
#include <iostream>
#include <iomanip>
#include <string>
#include <fstream>
#include <iostream>
#include <stdlib.h>
#include <sstream>
using namespace std;


/*
  To compile:
  mpic++ test.cpp -o test -L/home/admin/libs -lscalapack -lrefblas -ltmg -lreflapack -lgfortran -Wall -O2
  To run:
  mpirun -np 4 ./test matrixfile 8 8 2 2

*/

extern "C" {
  /* Cblacs declarations */
  void Cblacs_pinfo(int*, int*);
  void Cblacs_get(int, int, int*);
  void Cblacs_gridinit(int*, const char*, int, int);
  void Cblacs_gridinfo(int, int*, int*, int*,int*);
  void Cblacs_pcoord(int, int, int*, int*);
  void Cblacs_gridexit(int);
  void Cblacs_barrier(int, const char*);
  void Cdgerv2d(int, int, int, double*, int, int, int);
  void Cdgesd2d(int, int, int, double*, int, int, int);

  int numroc_(int*, int*, int*, int*, int*);

  void pdpotrf_(char*, int*, double*,
        int*, int*, int*, int*);

  void descinit_( int *, int *, int *, int *, int *, int *, int *,
          int *, int *, int *);

}

int main(int argc, char **argv){
  /* MPI */
  int mpirank,nprocs;
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &mpirank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
  double MPIelapsed;
  double MPIt2;
  double MPIt1;

  /* Helping vars */
  int iZERO = 0;
  int verbose = 1;
  bool mpiroot = (mpirank == 0);

  if (argc < 6) {
    if (mpiroot)
      cerr << "Usage: matrixTest matrixfile N M Nb Mb"
       << endl
       << " N = Rows , M = Cols , Nb = Row Blocks , Mb = Col Blocks "
       << endl;

    MPI_Finalize();
    return 1;
  }
  /* Scalapack / Blacs Vars */
  int N, M, Nb, Mb;
  int descA[9];
  int info = 0;
  //  int mla = 4;
  int ord = 8;



  double *A_glob = NULL, *A_glob2 = NULL, *A_loc = NULL;

  /* Parse command line arguments */
  if (mpiroot) {
    /* Read command line arguments */
    stringstream stream;
    stream << argv[2] << " " << argv[3] << " " << argv[4] << " " << argv[5];
    stream >> N >> M >> Nb >> Mb;


    /* Reserve space and read matrix (with transposition!) */
    A_glob  = new double[N*M];
    A_glob2 = new double[N*M];
    string fname(argv[1]);
    ifstream file(fname.c_str());
    for (int r = 0; r < N; ++r) {
      for (int c = 0; c < M; ++c) {
    file >> *(A_glob + N*c + r);
      }
    }

    /* Print matrix */

      if(verbose == 1) {
    cout << "Matrix A:\n";
    for (int r = 0; r < N; ++r) {
      for (int c = 0; c < M; ++c) {
        cout << setw(3) << *(A_glob + N*c + r) << " ";
      }
      cout << "\n";
    }
    cout << endl;
      }


  }

  /* Begin Cblas context */
  int ctxt, myid, myrow, mycol, numproc;
  //<TODO> make dynamic
  int procrows = 2, proccols = 2;
  Cblacs_pinfo(&myid, &numproc);
  Cblacs_get(0, 0, &ctxt);
  Cblacs_gridinit(&ctxt, "Row-major", procrows, proccols);
  Cblacs_gridinfo( ctxt, &procrows, &proccols, &myrow, &mycol );
  /* process coordinates for the process grid */
  // Cblacs_pcoord(ctxt, myid, &myrow, &mycol);


  /* Broadcast of the matrix dimensions */
  int dimensions[4];
  if (mpiroot) {
    dimensions[0] = N;//Global Rows
    dimensions[1] = M;//Global Cols
    dimensions[2] = Nb;//Local Rows
    dimensions[3] = Mb;//Local Cols
  }
  MPI_Bcast(dimensions, 4, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&ord, 1, MPI_INT, 0, MPI_COMM_WORLD);
  N = dimensions[0];
  M = dimensions[1];
  Nb = dimensions[2];
  Mb = dimensions[3];

  int nrows = numroc_(&N, &Nb, &myrow, &iZERO, &procrows);
  int ncols = numroc_(&M, &Mb, &mycol, &iZERO, &proccols);

  int lda = max(1,nrows);

  MPI_Bcast(&lda, 1, MPI_INT, 0, MPI_COMM_WORLD);

  /* Print grid pattern */
  if (myid == 0)
    cout << "Processes grid pattern:" << endl;
  for (int r = 0; r < procrows; ++r) {
    for (int c = 0; c < proccols; ++c) {
      Cblacs_barrier(ctxt, "All");
      if (myrow == r && mycol == c) {
    cout << myid << " " << flush;
      }
    }
    Cblacs_barrier(ctxt, "All");
    if (myid == 0)
      cout << endl;
  }

  if(myid == 0){
    cout <<"Run Parameters"<<endl;
    cout <<"Global Rows = " << M <<endl;
    cout <<"Global Cols = " << N <<endl;
    cout <<"Local Block Rows = " << Mb <<endl;
    cout <<"Local Block Cols = " << Nb <<endl;
    cout << "nrows = "<<nrows<<endl;
    cout << "ncols = "<<ncols<<endl;
    cout << "lda = "<<lda<<endl;
    cout <<"Order = "<<ord<<endl;
  }


  for (int id = 0; id < numproc; ++id) {
    Cblacs_barrier(ctxt, "All");
  }
  A_loc = new double[nrows*ncols];
  for (int i = 0; i < nrows*ncols; ++i) *(A_loc+i)=0.;

  /* Scatter matrix */
  int sendr = 0, sendc = 0, recvr = 0, recvc = 0;
  for (int r = 0; r < N; r += Nb, sendr=(sendr+1)%procrows) {
    sendc = 0;
    int nr = Nb;
    if (N-r < Nb)
      nr = N-r;

    for (int c = 0; c < M; c += Mb, sendc=(sendc+1)%proccols) {
      int nc = Mb;
      if (M-c < Mb)
    nc = M-c;

      if (mpiroot) {
    Cdgesd2d(ctxt, nr, nc, A_glob+N*c+r, N, sendr, sendc);
      }

      if (myrow == sendr && mycol == sendc) {
    Cdgerv2d(ctxt, nr, nc, A_loc+nrows*recvc+recvr, nrows, 0, 0);
    recvc = (recvc+nc)%ncols;
      }

    }

    if (myrow == sendr)
      recvr = (recvr+nr)%nrows;
  }

  /* Print local matrices */
  if(verbose == 1) {
  for (int id = 0; id < numproc; ++id) {
    if (id == myid) {
    cout << "A_loc on node " << myid << endl;
    for (int r = 0; r < nrows; ++r) {
      for (int c = 0; c < ncols; ++c)
        cout << setw(3) << *(A_loc+nrows*c+r) << " ";
      cout << endl;
    }
    cout << endl;
      }
      Cblacs_barrier(ctxt, "All");
    }
  }

  for (int id = 0; id < numproc; ++id) {
    Cblacs_barrier(ctxt, "All");
  }

  /* DescInit */
  info=0;
  descinit_(descA, &N, &M, &Nb, &Mb,&iZERO,&iZERO,&ctxt, &lda, &info);

  if(mpiroot){
    if(verbose == 1){
      if (info == 0){
    cout<<"Description init sucesss!"<<endl;
      }
      if(info < 0){
    cout <<"Error Info < 0: if INFO = -i, the i-th argument had an illegal value"<< endl
         <<"Info = " << info<<endl;
      }
    }
    // Cblacs_barrier(ctxt, "All");
  }

  //psgesv_(n, 1, al, 1,1,idescal, ipiv, b, 1,1,idescb,  info) */
  //    psgesv_(&n, &one, al, &one,&one,idescal, ipiv, b, &one,&one,idescb,  &info);
  //pXelset http://www.netlib.org/scalapack/tools/pdelset.f


  /* CHOLESKY HERE */
  info = 0;
  MPIt1=MPI_Wtime();

  pdpotrf_("L",&ord,A_loc,&Nb,&Mb,descA,&info);

  for (int id = 0; id < numproc; ++id) {
    Cblacs_barrier(ctxt, "All");
  }

  MPIt2 = MPI_Wtime();
  MPIelapsed=MPIt2-MPIt1;
  if(mpiroot){
    std::cout<<"Cholesky MPI Run Time" << MPIelapsed<<std::endl;

    if(info == 0){
      std::cout<<"SUCCESS"<<std::endl;
    }
    if(info < 0){

      cout << "info < 0:  If the i-th argument is an array and the j-entry had an illegal value, then INFO = -(i*100+j), if the i-th argument is a scalar and had an illegal value, then INFO = -i. " << endl;
      cout<<"info = " << info << endl;
    }
    if(info > 0){
      std::cout<<"matrix is not positve definte"<<std::endl;
    }
  }

  //sanity check set global matrix to zero before it's recieved by nodes
  if(mpiroot){
    for (int r = 0; r < N; ++r) {
      for (int c = 0; c < M; ++c) {
    A_glob2[c *N + r]  = 0; 
      }
    }
  }

  /* Gather matrix */
  sendr = 0;
  for (int r = 0; r < N; r += Nb, sendr=(sendr+1)%procrows) {
    sendc = 0;
    // Number of rows to be sent
    // Is this the last row block?
    int nr = Nb;
    if (N-r < Nb)
      nr = N-r;

    for (int c = 0; c < M; c += Mb, sendc=(sendc+1)%proccols) {
      // Number of cols to be sent
      // Is this the last col block?
      int nc = Mb;
      if (M-c < Mb)
    nc = M-c;

      if (myrow == sendr && mycol == sendc) {
    // Send a nr-by-nc submatrix to process (sendr, sendc)
    Cdgesd2d(ctxt, nr, nc, A_loc+nrows*recvc+recvr, nrows, 0, 0);
    recvc = (recvc+nc)%ncols;
      }

      if (mpiroot) {
    // Receive the same data
    // The leading dimension of the local matrix is nrows!
    Cdgerv2d(ctxt, nr, nc, A_glob2+N*c+r, N, sendr, sendc);
      }
    }
    if (myrow == sendr)
      recvr = (recvr+nr)%nrows;
  }
  /* Print test matrix */
  if (mpiroot) {
    if(verbose == 1){
      cout << "Matrix A test:\n";
      for (int r = 0; r < N; ++r) {
    for (int c = 0; c < M; ++c) {
      cout << setw(3) << *(A_glob2+N*c+r) << " ";
    }
    cout << endl;
      }
    }
  }

  /* Release resources */
  delete[] A_glob;
  delete[] A_glob2;
  delete[] A_loc;
  Cblacs_gridexit(ctxt);
  MPI_Finalize();
}
EN

回答 1

Stack Overflow用户

回答已采纳

发布于 2013-01-07 20:55:11

对-我解决了这个问题。这里是您必须做的(我检查了修改后的MPI程序的结果与你的矩阵在八度中的Cholesky分解--它起作用了)。

我发现IBM提供的以下LAPACK引用比链接中的LAPACK更有帮助:lpotrf.htm

PDPOTRF( UPLO, N, A, IA, JA, DESCA, INFO )

您将传递MbNbIAJA。但是,这些参数用于在更大的矩阵中提供全局矩阵的起始行和列。只有当你有一个大矩阵并且只想要一个子矩阵的Cholesky分解时,它们才是相关的。在您的例子中,IAJA都必须是1

所以你要做的就是:

代码语言:javascript
复制
int IA = 1;
int JA = 1;
pdpotrf_("L",&ord,A_loc,&IA,&JA,descA,&info);

您还可能希望更改硬编码的int ord = 8;,使其取决于从命令行读取的值,否则稍后会遇到问题。

如上文所述,程序的输出修改如下:

代码语言:javascript
复制
Cholesky MPI Run Time0.000659943
SUCCESS
Matrix A test:
13.4907 147 140 125 132  76 126 157 
10.8964 9.70923 185 150 209 114 166 188 
10.3775 7.4077 8.33269 129 194 142 199 205 
9.26562 5.0507 -0.548194 5.59806 148  81 104 150 
9.78449 10.5451 1.72175 0.897537 1.81524 122 172 189 
5.63349 5.41911 5.20784 0.765767 0.0442447 3.63139 129 117 
9.33974 6.61543 6.36911 -2.22569 1.03941 2.48498 1.79738 181 
11.6376 6.30249 4.50561 2.28799 -0.627688 -2.17633 7.27182 0.547228 

用于比较的倍频程输出:

代码语言:javascript
复制
octave:1> A=dlmread("matrixfile4")
A =

   182   147   140   125   132    76   126   157
   147   213   185   150   209   114   166   188
   140   185   232   129   194   142   199   205
   125   150   129   143   148    81   104   150
   132   209   194   148   214   122   172   189
    76   114   142    81   122   102   129   117
   126   166   199   104   172   129   187   181
   157   188   205   150   189   117   181   259

octave:2> C=chol(A)
C =

   13.49074   10.89636   10.37749    9.26562    9.78449    5.63349    9.33974   11.63761
    0.00000    9.70923    7.40770    5.05070   10.54508    5.41911    6.61543    6.30249
    0.00000    0.00000    8.33269   -0.54819    1.72175    5.20784    6.36911    4.50561
    0.00000    0.00000    0.00000    5.59806    0.89754    0.76577   -2.22569    2.28799
    0.00000    0.00000    0.00000    0.00000    1.81524    0.04424    1.03941   -0.62769
    0.00000    0.00000    0.00000    0.00000    0.00000    3.63139    2.48498   -2.17633
    0.00000    0.00000    0.00000    0.00000    0.00000    0.00000    1.79738    7.27182
    0.00000    0.00000    0.00000    0.00000    0.00000    0.00000    0.00000    0.54723

(我先前的评论-now删除了-关于矩阵被错误复制到节点上的说法不适用,您程序的其余部分对我来说似乎不错。)

票数 7
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/14147705

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档