23 #include "matrixlapack.h"    24 #include "Lanczos_EigenValue.h"    25 #include "wrapperMPI.h"    57   unsigned long int i_max_tmp;
    61   double complex temp1, temp2;
    62   double complex cbeta1;
    63   double E[5], ebefor, E_target;
    70   int int_i, int_j, 
mfint[7];
    74   i_max = 
X->Check.idim_max;
    75   k_exct = 
X->Def.k_exct;
    76   unsigned long int liLanczosStp;
    77   liLanczosStp = 
X->Def.Lanczos_max;
    78   unsigned long int liLanczosStp_vec=0;
    80     if (
X->Def.iReStart == RESTART_INOUT || 
X->Def.iReStart == RESTART_IN){
    82         fprintf(
stdoutMPI, 
"  Error: Fail to read initial vectors\n");
    87         fprintf(
stdoutMPI, 
"  Error: Fail to read TMcomponents\n");
    90       if(liLanczosStp_vec !=liLanczosStp){
    91         fprintf(
stdoutMPI, 
"  Error: Input files for vector and TMcomponents are incoorect.\n");
    92         fprintf(
stdoutMPI, 
"  Error: Input vector %ld th stps, TMcomponents  %ld th stps.\n", liLanczosStp_vec, liLanczosStp);
    95       X->Def.Lanczos_restart=liLanczosStp;
    98       liLanczosStp = liLanczosStp+
X->Def.Lanczos_max;
    99       alpha1=
alpha[
X->Def.Lanczos_restart];
   100       beta1=
beta[
X->Def.Lanczos_restart];
   112       alpha1 = creal(
X->Large.prdct);
   117 #pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1)   118       for (i = 1; i <= i_max; i++) {
   119         cbeta1 += conj(
v0[i] - alpha1 * 
v1[i]) * (
v0[i] - alpha1 * 
v1[i]);
   122       beta1 = creal(cbeta1);
   126       liLanczosStp = 
X->Def.Lanczos_max;
   127       X->Def.Lanczos_restart =1;
   134   if (i_max_tmp < liLanczosStp) {
   135     liLanczosStp = i_max_tmp;
   137   if (i_max_tmp < X->Def.LanczosTarget) {
   138     liLanczosStp = i_max_tmp;
   140   if (i_max_tmp == 1) {
   146     X->Phys.Target_energy = E[k_exct];
   148     fprintf(
stdoutMPI, 
"  LanczosStep  E[1] \n");
   149     fprintf(
stdoutMPI, 
"  stp=%d %.10lf \n", stp, E[1]);
   152   fprintf(
stdoutMPI, 
"  LanczosStep  E[1] E[2] E[3] E[4] Target:E[%d] E_Max/Nsite\n", 
X->Def.LanczosTarget + 1);
   153   for (stp = 
X->Def.Lanczos_restart+1; stp <= liLanczosStp; stp++) {
   154 #pragma omp parallel for default(none) private(i,temp1, temp2) shared(v0, v1) firstprivate(i_max, alpha1, beta1)   155     for (i = 1; i <= i_max; i++) {
   157       temp2 = (
v0[i] - alpha1 * 
v1[i]) / beta1;
   158       v0[i] = -beta1 * temp1;
   166     alpha1 = creal(
X->Large.prdct);
   170 #pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1)   171     for (i = 1; i <= i_max; i++) {
   172       cbeta1 += conj(
v0[i] - alpha1 * 
v1[i]) * (
v0[i] - alpha1 * 
v1[i]);
   175     beta1 = creal(cbeta1);
   179     Target = 
X->Def.LanczosTarget;
   182       d_malloc2(tmp_mat, stp, stp);
   183       d_malloc1(tmp_E, stp + 1);
   185       for (int_i = 0; int_i < stp; int_i++) {
   186         for (int_j = 0; int_j < stp; int_j++) {
   187           tmp_mat[int_i][int_j] = 0.0;
   190       tmp_mat[0][0] = 
alpha[1];
   191       tmp_mat[0][1] = 
beta[1];
   192       tmp_mat[1][0] = 
beta[1];
   193       tmp_mat[1][1] = 
alpha[2];
   198         E_target = tmp_E[Target];
   201       d_free1(tmp_E, stp + 1);
   202       d_free2(tmp_mat, stp, stp);
   206       fprintf(fp, 
"LanczosStep  E[1] E[2] E[3] E[4] Target:E[%d] E_Max/Nsite\n", Target + 1);
   208         fprintf(
stdoutMPI, 
"  stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx  %.10lf xxxxxxxxx \n", stp, E[1], E[2],
   210         fprintf(fp, 
"stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx %.10lf xxxxxxxxx \n", stp, E[1], E[2], E_target);
   212         fprintf(
stdoutMPI, 
"  stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx xxxxxxxxx xxxxxxxxx \n", stp, E[1], E[2]);
   213         fprintf(fp, 
"stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx xxxxxxxxx xxxxxxxxx \n", stp, E[1], E[2]);
   223       d_malloc2(tmp_mat, stp, stp);
   224       d_malloc1(tmp_E, stp + 1);
   226       for (int_i = 0; int_i < stp; int_i++) {
   227         for (int_j = 0; int_j < stp; int_j++) {
   228           tmp_mat[int_i][int_j] = 0.0;
   231       tmp_mat[0][0] = 
alpha[1];
   232       tmp_mat[0][1] = 
beta[1];
   233       for (int_i = 1; int_i < stp - 1; int_i++) {
   234         tmp_mat[int_i][int_i] = 
alpha[int_i + 1];
   235         tmp_mat[int_i][int_i + 1] = 
beta[int_i + 1];
   236         tmp_mat[int_i][int_i - 1] = 
beta[int_i];
   238       tmp_mat[int_i][int_i] = 
alpha[int_i + 1];
   239       tmp_mat[int_i][int_i - 1] = 
beta[int_i];
   247       E[0] = tmp_E[stp - 1];
   249         E_target = tmp_E[Target];
   251       d_free1(tmp_E, stp + 1);
   252       d_free2(tmp_mat, stp, stp);
   254         fprintf(
stdoutMPI, 
"  stp = %d %.10lf %.10lf %.10lf %.10lf %.10lf %.10lf\n", stp, E[1], E[2], E[3], E[4],
   255                 E_target, E[0] / (
double) 
X->Def.NsiteMPI);
   256         fprintf(fp, 
"stp=%d %.10lf %.10lf %.10lf %.10lf %.10lf %.10lf\n", stp, E[1], E[2], E[3], E[4], E_target,
   257                 E[0] / (
double) 
X->Def.NsiteMPI);
   259         fprintf(
stdoutMPI, 
"  stp = %d %.10lf %.10lf %.10lf %.10lf xxxxxxxxx %.10lf\n", stp, E[1], E[2], E[3], E[4],
   260                 E[0] / (
double) 
X->Def.NsiteMPI);
   261         fprintf(fp, 
"stp=%d %.10lf %.10lf %.10lf %.10lf xxxxxxxxx %.10lf\n", stp, E[1], E[2], E[3], E[4],
   262                 E[0] / (
double) 
X->Def.NsiteMPI);
   266         if (fabs((E_target - ebefor) / E_target) < 
eps_Lanczos || fabs(
beta[stp]) < pow(10.0, -14)) {
   272           d_malloc1(tmp_E, stp + 1);
   277           X->Phys.Target_energy = E_target;
   278           X->Phys.Target_CG_energy = tmp_E[k_exct]; 
   280           d_free1(tmp_E, stp + 1);
   288   if (
X->Def.iReStart == RESTART_INOUT ||
X->Def.iReStart == RESTART_OUT ){
   289     if(stp != 
X->Def.Lanczos_restart+2) { 
   299     fprintf(
stdoutMPI, 
"  Lanczos Eigenvalue is not converged in this process.\n");
   325         double complex *tmp_v1,
   326         unsigned long int *liLanczos_step
   332   i_max = 
X->Check.idim_max;
   334   unsigned long int i_max_tmp;
   335   double beta1, alpha1; 
   336   double complex temp1, temp2;
   337   double complex cbeta1;
   345   if (i_max_tmp < *liLanczos_step || i_max_tmp < X->Def.LanczosTarget) {
   346     *liLanczos_step = i_max_tmp;
   349   if (
X->Def.Lanczos_restart == 0) { 
   350 #pragma omp parallel for default(none) private(i) shared(v0, v1, tmp_v1) firstprivate(i_max)   351     for (i = 1; i <= i_max; i++) {
   358     alpha1 = creal(
X->Large.prdct);
   361     fprintf(
stdoutMPI, 
"  Step / Step_max alpha beta \n");
   363 #pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1)   364     for (i = 1; i <= i_max; i++) {
   365       cbeta1 += conj(
v0[i] - alpha1 * 
v1[i]) * (
v0[i] - alpha1 * 
v1[i]);
   368     beta1 = creal(cbeta1);
   371     X->Def.Lanczos_restart = 1;
   373     alpha1 = 
alpha[
X->Def.Lanczos_restart];
   374     beta1 = 
beta[
X->Def.Lanczos_restart];
   377   for (stp = 
X->Def.Lanczos_restart + 1; stp <= *liLanczos_step; stp++) {
   379     if (fabs(_beta[stp - 1]) < pow(10.0, -14)) {
   380       *liLanczos_step = stp - 1;
   384 #pragma omp parallel for default(none) private(i, temp1, temp2) shared(v0, v1) firstprivate(i_max, alpha1, beta1)   385     for (i = 1; i <= i_max; i++) {
   387       temp2 = (
v0[i] - alpha1 * 
v1[i]) / beta1;
   388       v0[i] = -beta1 * temp1;
   394     alpha1 = creal(
X->Large.prdct);
   395     _alpha[stp] = alpha1;
   398 #pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1)   399     for (i = 1; i <= i_max; i++) {
   400       cbeta1 += conj(
v0[i] - alpha1 * 
v1[i]) * (
v0[i] - alpha1 * 
v1[i]);
   403     beta1 = creal(cbeta1);
   407       fprintf(
stdoutMPI, 
"  stp = %d / %lu %.10lf  %.10lf \n", stp,  *liLanczos_step, alpha1, beta1);
   430   unsigned long int i_max;
   432   fprintf(
stdoutMPI, 
"  Start: Input vectors for recalculation.\n");
   438   byte_size = fread(liLanczosStp_vec, 
sizeof(*liLanczosStp_vec),1,fp);
   439   byte_size = fread(&i_max, 
sizeof(
long int), 1, fp);
   440   if(i_max != 
X->Check.idim_max){
   441     fprintf(stderr, 
"Error: A size of Inputvector is incorrect.\n");
   444   byte_size = fread(_v0, 
sizeof(complex 
double), 
X->Check.idim_max + 1, fp);
   445   byte_size = fread(_v1, 
sizeof(complex 
double), 
X->Check.idim_max + 1, fp);
   447   fprintf(
stdoutMPI, 
"  End:   Input vectors for recalculation.\n");
   449   if (byte_size == 0) printf(
"byte_size: %d \n", (
int)byte_size);
   464                         double complex* tmp_v0,
   465                         double complex *tmp_v1,
   466                         unsigned long int liLanczosStp_vec){
   470   fprintf(
stdoutMPI, 
"    Start: Output vectors for recalculation.\n");
   477   fwrite(&liLanczosStp_vec, 
sizeof(liLanczosStp_vec),1,fp);
   478   fwrite(&
X->Check.idim_max, 
sizeof(
X->Check.idim_max),1,fp);
   479   fwrite(tmp_v0, 
sizeof(complex 
double),
X->Check.idim_max+1, fp);
   480   fwrite(tmp_v1, 
sizeof(complex 
double),
X->Check.idim_max+1, fp);
   483   fprintf(
stdoutMPI, 
"    End:   Output vectors for recalculation.\n");
   498   long int i, iv, i_max;
   499   unsigned long int i_max_tmp, sum_i_max;
   504   double complex cdnorm;
   505   long unsigned int u_long_i;
   508   i_max = 
X->Check.idim_max;
   510     if(
X->Def.iFlgMPI==0) {
   514       sum_i_max =
X->Check.idim_max;
   516     X->Large.iv = (sum_i_max / 2 + 
X->Def.initial_iv) % sum_i_max + 1;
   518     fprintf(
stdoutMPI, 
"  initial_mode=%d normal: iv = %ld i_max=%ld k_exct =%d \n\n", 
initial_mode, iv, i_max,
   520 #pragma omp parallel for default(none) private(i) shared(tmp_v0, tmp_v1) firstprivate(i_max)   521     for (i = 1; i <= i_max; i++) {
   527     if(
X->Def.iFlgMPI==0) {
   528       for (iproc = 0; iproc < 
nproc; iproc++) {
   530         if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp) {
   532             tmp_v1[iv - sum_i_max + 1] = 1.0;
   533             if (
X->Def.iInitialVecType == 0) {
   534               tmp_v1[iv - sum_i_max + 1] += 1.0 * I;
   535               tmp_v1[iv - sum_i_max + 1] /= sqrt(2.0);
   540         sum_i_max += i_max_tmp;
   545       tmp_v1[iv + 1] = 1.0;
   546       if (
X->Def.iInitialVecType == 0) {
   547         tmp_v1[iv + 1] += 1.0 * I;
   548         tmp_v1[iv + 1] /= sqrt(2.0);
   553     iv = 
X->Def.initial_iv;
   554     fprintf(
stdoutMPI, 
"  initial_mode=%d (random): iv = %ld i_max=%ld k_exct =%d \n\n", 
initial_mode, iv, i_max,
   556 #pragma omp parallel default(none) private(i, u_long_i, mythread, dsfmt) \   557             shared(tmp_v0, tmp_v1, iv, X, nthreads, myrank) firstprivate(i_max)   561       for (i = 1; i <= i_max; i++) {
   568       mythread = omp_get_thread_num();
   572       if(
X->Def.iFlgMPI==0) {
   576         u_long_i = 123432 + labs(iv)+ mythread ;
   578       dsfmt_init_gen_rand(&dsfmt, u_long_i);
   580       if (
X->Def.iInitialVecType == 0) {
   582         for (i = 1; i <= i_max; i++)
   583           tmp_v1[i] = 2.0 * (dsfmt_genrand_close_open(&dsfmt) - 0.5) +
   584                       2.0 * (dsfmt_genrand_close_open(&dsfmt) - 0.5) * I;
   587         for (i = 1; i <= i_max; i++)
   588           tmp_v1[i] = 2.0 * (dsfmt_genrand_close_open(&dsfmt) - 0.5);
   594 #pragma omp parallel for default(none) private(i) shared(tmp_v1, i_max) reduction(+: cdnorm)   595     for (i = 1; i <= i_max; i++) {
   596       cdnorm += conj(tmp_v1[i]) * tmp_v1[i];
   598     if(
X->Def.iFlgMPI==0) {
   601     dnorm = creal(cdnorm);
   603 #pragma omp parallel for default(none) private(i) shared(tmp_v1) firstprivate(i_max, dnorm)   604     for (i = 1; i <= i_max; i++) {
   605       tmp_v1[i] = tmp_v1[i] / dnorm;
   624         unsigned long int *_i_max,
   630   unsigned long int idx, i, ivec;
   631   unsigned long int i_max;
   640   fgetsMPI(ctmp, 
sizeof(ctmp)/
sizeof(
char), fp);
   641   sscanf(ctmp,
"%ld \n", &i_max);
   642   if (
X->Def.LanczosTarget > 
X->Def.nvec) {
   643     ivec = 
X->Def.LanczosTarget + 1;
   646     ivec =
X->Def.nvec + 1;
   650     alpha = (
double *) realloc(
alpha, 
sizeof(
double) * (i_max + 
X->Def.Lanczos_max + 1));
   651     beta = (
double *) realloc(
beta, 
sizeof(
double) * (i_max + 
X->Def.Lanczos_max + 1));
   652     for (i = 0; i <  ivec; i++) {
   653       vec[i] = (complex 
double *) realloc(
vec[i], (i_max + 
X->Def.Lanczos_max + 1) * 
sizeof(complex double));
   657     alpha=(
double*)realloc(
alpha, 
sizeof(
double)*(i_max + 1));
   658     beta=(
double*)realloc(
beta, 
sizeof(
double)*(i_max + 1));
   659     for (i = 0; i < ivec; i++) {
   660       vec[i] = (complex 
double *) realloc(
vec[i], (i_max + 
X->Def.Lanczos_max + 1) * 
sizeof(complex double));
   667   fgetsMPI(ctmp, 
sizeof(ctmp)/
sizeof(
char), fp);
   668   sscanf(ctmp,
"%lf \n", &dnorm);
   669   while(
fgetsMPI(ctmp, 
sizeof(ctmp)/
sizeof(
char), fp) != NULL){
   670     sscanf(ctmp,
"%lf %lf \n", &
alpha[idx], &
beta[idx]);
   707   fprintf(fp, 
"%d \n",liLanczosStp);
   708   fprintf(fp, 
"%.10lf \n",_dnorm);
   709   for( i = 1 ; i <= liLanczosStp; i++){
   710     fprintf(fp,
"%.10lf %.10lf \n", _alpha[i], _beta[i]);
 int childfopenALL(const char *_cPathChild, const char *_cmode, FILE **_fp)
All processes open file in output/ directory. 
int mltply(struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Parent function of multiplying the wavefunction by the Hamiltonian. . First, the calculation of diago...
void StartTimer(int n)
function for initializing elapse time [start] 
double complex SumMPI_dc(double complex norm)
MPI wrapper function to obtain sum of Double complex across processes. 
const char * cLogLanczos_EigenValueNotConverged
unsigned long int BcastMPI_li(int root, unsigned long int idim)
MPI wrapper function to broadcast unsigned long integer across processes. 
void SetInitialVector(struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Set initial vector to start the calculation for Lanczos method. . 
void StopTimer(int n)
function for calculating elapse time [elapse time=StartTimer-StopTimer] 
int Lanczos_GetTridiagonalMatrixComponents(struct BindStruct *X, double *_alpha, double *_beta, double complex *tmp_v1, unsigned long int *liLanczos_step)
Calculate tridiagonal matrix components by Lanczos method. 
int childfopenMPI(const char *_cPathChild, const char *_cmode, FILE **_fp)
Only the root process open file in output/ directory. 
const char * c_Lanczos_SpectrumStep
const char * cLanczos_EigenValueFinish
const char * cFileNameOutputRestartVec
int DSEVvalue(int xNsize, double **A, double *r)
obtain eigenvalues of real symmetric A 
const char * cFileNameLanczosStep
int nthreads
Number of Threads, defined in InitializeMPI() 
int nproc
Number of processors, defined in InitializeMPI() 
int OutputLanczosVector(struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1, unsigned long int liLanczosStp_vec)
Output initial vectors for the restart calculation. 
const char * c_OutputSpectrumRecalcvecEnd
const char * c_InputSpectrumRecalcvecStart
const char * cLogLanczos_EigenValueStart
const char * cLogLanczos_EigenValueEnd
const char * cLanczos_EigenValueStart
const char * cFileNameTridiagonalMatrixComponents
static unsigned long int mfint[7]
int ReadInitialVector(struct BindStruct *X, double complex *_v0, double complex *_v1, unsigned long int *liLanczosStp_vec)
Read initial vectors for the restart calculation. 
const char * cFileNameTimeKeep
const char * c_InputSpectrumRecalcvecEnd
void vec12(double alpha[], double beta[], unsigned int ndim, double tmp_E[], struct BindStruct *X)
Diagonalize a tri-diagonal matrix and store eigenvectors into vec. 
int OutputTMComponents(struct BindStruct *X, double *_alpha, double *_beta, double _dnorm, int liLanczosStp)
Output tridiagonal matrix components obtained by the Lanczos method. 
int Lanczos_EigenValue(struct BindStruct *X)
Main function for calculating eigen values by Lanczos method. The energy convergence is judged by the...
const char * cLanczos_EigenValueStep
int myrank
Process ID, defined in InitializeMPI() 
char * fgetsMPI(char *InputString, int maxcount, FILE *fp)
MPI file I/O (get a line, fgets) wrapper. Only the root node (myrank = 0) reads and broadcast string...
int ReadTMComponents(struct BindStruct *X, double *_dnorm, unsigned long int *_i_max, int iFlg)
Read tridiagonal matrix components obtained by the Lanczos method. . 
const char * c_OutputSpectrumRecalcvecStart
unsigned long int SumMPI_li(unsigned long int idim)
MPI wrapper function to obtain sum of unsigned long integer across processes. 
int TimeKeeperWithStep(struct BindStruct *X, const char *cFileName, const char *cTimeKeeper_Message, const char *cWriteType, const int istep)
Functions for writing a time log. 
int TimeKeeper(struct BindStruct *X, const char *cFileName, const char *cTimeKeeper_Message, const char *cWriteType)
Functions for writing a time log. 
FILE * stdoutMPI
File pointer to the standard output defined in InitializeMPI()