Actual source code: spbas_cholesky.h
  2: /*
  3:    spbas_cholesky_row_alloc:
  4:       in the data arrays, find a place where another row may be stored.
  5:       Return PETSC_ERR_MEM if there is insufficient space to store the
  6:       row, so garbage collection and/or re-allocation may be done.
  7: */
  8: PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
  9: {
 11:   retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
 12:   retval.values[k] = &retval.alloc_val[*n_alloc_used];
 13:   *n_alloc_used   += r_nnz;
 14:   if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM);
 15:   return(0);
 16: }
 19: /*
 20:   spbas_cholesky_garbage_collect:
 21:      move the rows which have been calculated so far, as well as
 22:      those currently under construction, to the front of the
 23:      array, while putting them in the proper order.
 24:      When it seems necessary, enlarge the current arrays.
 26:      NB: re-allocation is being simulated using
 27:          PetscMalloc, memcpy, PetscFree, because
 28:          PetscRealloc does not seem to exist.
 30: */
 31: PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result,         /* I/O: the Cholesky factor matrix being constructed.
 32:                                                                                     Only the storage, not the contents of this matrix is changed in this function */
 33:                                               PetscInt     i_row,           /* I  : Number of rows for which the final contents are known */
 34:                                               PetscInt     *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
 35:                                                                                     places in the arrays: they need not be moved any more */
 36:                                               PetscInt     *n_alloc_used,   /* I/O:  */
 37:                                               PetscInt     *max_row_nnz)    /* I  : Over-estimate of the number of nonzeros needed to store each row */
 38: {
 39: /* PSEUDO-CODE:
 40:   1. Choose the appropriate size for the arrays
 41:   2. Rescue the arrays which would be lost during garbage collection
 42:   3. Reallocate and correct administration
 43:   4. Move all arrays so that they are in proper order */
 45:   PetscInt        i,j;
 46:   PetscInt        nrows         = result->nrows;
 47:   PetscInt        n_alloc_ok    =0;
 48:   PetscInt        n_alloc_ok_max=0;
 49:   PetscErrorCode  ierr;
 50:   PetscInt        need_already  = 0;
 51:   PetscInt        n_rows_ahead  =0;
 52:   PetscInt        max_need_extra= 0;
 53:   PetscInt        n_alloc_max, n_alloc_est, n_alloc;
 54:   PetscInt        n_alloc_now     = result->n_alloc_icol;
 55:   PetscInt        *alloc_icol_old = result->alloc_icol;
 56:   PetscScalar     *alloc_val_old  = result->alloc_val;
 57:   PetscInt        *icol_rescue;
 58:   PetscScalar     *val_rescue;
 59:   PetscInt        n_rescue;
 60:   PetscInt        n_row_rescue;
 61:   PetscInt        i_here, i_last, n_copy;
 62:   const PetscReal xtra_perc = 20;
 65:   /*********************************************************
 66:   1. Choose appropriate array size
 67:   Count number of rows and memory usage which is already final */
 68:   for (i=0; i<i_row; i++)  {
 69:     n_alloc_ok     += result->row_nnz[i];
 70:     n_alloc_ok_max += max_row_nnz[i];
 71:   }
 73:   /* Count currently needed memory usage and future memory requirements
 74:     (max, predicted)*/
 75:   for (i=i_row; i<nrows; i++) {
 76:     if (!result->row_nnz[i]) {
 77:       max_need_extra += max_row_nnz[i];
 78:     } else {
 79:       need_already += max_row_nnz[i];
 80:       n_rows_ahead++;
 81:     }
 82:   }
 84:   /* Make maximal and realistic memory requirement estimates */
 85:   n_alloc_max = n_alloc_ok + need_already + max_need_extra;
 86:   n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) *  ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));
 88:   /* Choose array sizes */
 89:   if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
 90:   else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
 91:   else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max;
 92:   else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0));
 94:   /* If new estimate is less than what we already have,
 95:     don't reallocate, just garbage-collect */
 96:   if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
 97:     n_alloc = result->n_alloc_icol;
 98:   }
100:   /* Motivate dimension choice */
101:   PetscInfo1(NULL,"   Allocating %d nonzeros: ",n_alloc);
102:   if (n_alloc_max == n_alloc_est) {
103:     PetscInfo(NULL,"this is the correct size\n");
104:   } else if (n_alloc_now >= n_alloc_est) {
105:     PetscInfo(NULL,"the current size, which seems enough\n");
106:   } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) {
107:     PetscInfo(NULL,"the maximum estimate\n");
108:   } else {
109:     PetscInfo1(NULL,"%6.2f %% more than the estimate\n",xtra_perc);
110:   }
113:   /**********************************************************
114:   2. Rescue arrays which would be lost
115:   Count how many rows and nonzeros will have to be rescued
116:   when moving all arrays in place */
117:   n_row_rescue = 0; n_rescue = 0;
118:   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
119:   else {
120:     i = *n_row_alloc_ok - 1;
122:     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
123:   }
125:   for (i=*n_row_alloc_ok; i<nrows; i++) {
126:     i_here = result->icols[i]-result->alloc_icol;
127:     i_last = i_here + result->row_nnz[i];
128:     if (result->row_nnz[i]>0) {
129:       if (*n_alloc_used > i_here || i_last > n_alloc) {
130:         n_rescue += result->row_nnz[i];
131:         n_row_rescue++;
132:       }
134:       if (i<i_row) *n_alloc_used += result->row_nnz[i];
135:       else         *n_alloc_used += max_row_nnz[i];
136:     }
137:   }
139:   /* Allocate rescue arrays */
140:   PetscMalloc1(n_rescue, &icol_rescue);
141:   PetscMalloc1(n_rescue, &val_rescue);
143:   /* Rescue the arrays which need rescuing */
144:   n_row_rescue = 0; n_rescue = 0;
145:   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
146:   else {
147:     i             = *n_row_alloc_ok - 1;
148:     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
149:   }
151:   for (i=*n_row_alloc_ok; i<nrows; i++) {
152:     i_here = result->icols[i]-result->alloc_icol;
153:     i_last = i_here + result->row_nnz[i];
154:     if (result->row_nnz[i]>0) {
155:       if (*n_alloc_used > i_here || i_last > n_alloc) {
156:         PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i]);
157:         PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i]);
158:         n_rescue += result->row_nnz[i];
159:         n_row_rescue++;
160:       }
162:       if (i<i_row) *n_alloc_used += result->row_nnz[i];
163:       else         *n_alloc_used += max_row_nnz[i];
164:     }
165:   }
167:   /**********************************************************
168:   3. Reallocate and correct administration */
170:   if (n_alloc != result->n_alloc_icol) {
171:     n_copy = PetscMin(n_alloc,result->n_alloc_icol);
173:     /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
175:         Allocate new icol-data, copy old contents */
176:     PetscMalloc1(n_alloc, &result->alloc_icol);
177:     PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy);
179:     /* Update administration, Reset pointers to new arrays  */
180:     result->n_alloc_icol = n_alloc;
181:     for (i=0; i<nrows; i++) {
182:       result->icols[i]  =  result->alloc_icol + (result->icols[i]  - alloc_icol_old);
183:       result->values[i] =  result->alloc_val  + (result->icols[i]  - result->alloc_icol);
184:     }
186:     /* Delete old array */
187:     PetscFree(alloc_icol_old);
189:     /* Allocate new value-data, copy old contents */
190:     PetscMalloc1(n_alloc, &result->alloc_val);
191:     PetscArraycpy(result->alloc_val, alloc_val_old, n_copy);
193:     /* Update administration, Reset pointers to new arrays  */
194:     result->n_alloc_val = n_alloc;
195:     for (i=0; i<nrows; i++) {
196:       result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
197:     }
199:     /* Delete old array */
200:     PetscFree(alloc_val_old);
201:   }
204:   /*********************************************************
205:   4. Copy all the arrays to their proper places */
206:   n_row_rescue = 0; n_rescue = 0;
207:   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
208:   else {
209:     i = *n_row_alloc_ok - 1;
211:     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
212:   }
214:   for (i=*n_row_alloc_ok; i<nrows; i++) {
215:     i_here = result->icols[i]-result->alloc_icol;
216:     i_last = i_here + result->row_nnz[i];
218:     result->icols[i] = result->alloc_icol + *n_alloc_used;
219:     result->values[i]= result->alloc_val  + *n_alloc_used;
221:     if (result->row_nnz[i]>0) {
222:       if (*n_alloc_used > i_here || i_last > n_alloc) {
223:         PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i]);
224:         PetscArraycpy(result->values[i],&val_rescue[n_rescue],result->row_nnz[i]);
226:         n_rescue += result->row_nnz[i];
227:         n_row_rescue++;
228:       } else {
229:         for (j=0; j<result->row_nnz[i]; j++) {
230:           result->icols[i][j]  = result->alloc_icol[i_here+j];
231:           result->values[i][j] = result->alloc_val[i_here+j];
232:         }
233:       }
234:       if (i<i_row) *n_alloc_used += result->row_nnz[i];
235:       else         *n_alloc_used += max_row_nnz[i];
236:     }
237:   }
239:   /* Delete the rescue arrays */
240:   PetscFree(icol_rescue);
241:   PetscFree(val_rescue);
243:   *n_row_alloc_ok = i_row;
244:   return(0);
245: }
247: /*
248:   spbas_incomplete_cholesky:
249:      incomplete Cholesky decomposition of a square, symmetric,
250:      positive definite matrix.
252:      In case negative diagonals are encountered, function returns
253:      NEGATIVE_DIAGONAL. When this happens, call this function again
254:      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
255:      droptol
256: */
257: PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L)
258: {
259:   PetscInt        jL;
260:   Mat_SeqAIJ      *a =(Mat_SeqAIJ*)A->data;
261:   PetscInt        *ai=a->i,*aj=a->j;
262:   MatScalar       *aa=a->a;
263:   PetscInt        nrows, ncols;
264:   PetscInt        *max_row_nnz;
265:   PetscErrorCode  ierr;
266:   spbas_matrix    retval;
267:   PetscScalar     *diag;
268:   PetscScalar     *val;
269:   PetscScalar     *lvec;
270:   PetscScalar     epsdiag;
271:   PetscInt        i,j,k;
272:   const PetscBool do_values = PETSC_TRUE;
273:   PetscInt        *r1_icol;
274:   PetscScalar     *r1_val;
275:   PetscInt        *r_icol;
276:   PetscInt        r_nnz;
277:   PetscScalar     *r_val;
278:   PetscInt        *A_icol;
279:   PetscInt        A_nnz;
280:   PetscScalar     *A_val;
281:   PetscInt        *p_icol;
282:   PetscInt        p_nnz;
283:   PetscInt        n_row_alloc_ok = 0;   /* number of rows which have been stored   correctly in the matrix */
284:   PetscInt        n_alloc_used   = 0;   /* part of result->icols and result->values   which is currently being used */
287:   /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
288:   MatGetSize(A, &nrows, &ncols);
289:   MatGetTrace(A, &epsdiag);
291:   epsdiag *= epsdiag_in / nrows;
293:   PetscInfo2(NULL,"   Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol);
295:   if (droptol<1e-10) droptol=1e-10;
297:   if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n");
299:   retval.nrows        = nrows;
300:   retval.ncols        = nrows;
301:   retval.nnz          = pattern.nnz/10;
302:   retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
303:   retval.block_data   = PETSC_TRUE;
305:   spbas_allocate_pattern(&retval, do_values);
306:   PetscArrayzero(retval.row_nnz, nrows);
307:   spbas_allocate_data(&retval);
308:   retval.nnz = 0;
310:   PetscMalloc1(nrows, &diag);
311:   PetscCalloc1(nrows, &val);
312:   PetscCalloc1(nrows, &lvec);
313:   PetscCalloc1(nrows, &max_row_nnz);
315:   /* Check correct format of sparseness pattern */
316:   if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n");
318:   /* Count the nonzeros on transpose of pattern */
319:   for (i = 0; i<nrows; i++)  {
320:     p_nnz  = pattern.row_nnz[i];
321:     p_icol = pattern.icols[i];
322:     for (j=0; j<p_nnz; j++)  {
323:       max_row_nnz[i+p_icol[j]]++;
324:     }
325:   }
327:   /* Calculate rows of L */
328:   for (i = 0; i<nrows; i++)  {
329:     p_nnz  = pattern.row_nnz[i];
330:     p_icol = pattern.icols[i];
332:     r_nnz  = retval.row_nnz[i];
333:     r_icol = retval.icols[i];
334:     r_val  = retval.values[i];
335:     A_nnz  = ai[rip[i]+1] - ai[rip[i]];
336:     A_icol = &aj[ai[rip[i]]];
337:     A_val  = &aa[ai[rip[i]]];
339:     /* Calculate  val += A(i,i:n)'; */
340:     for (j=0; j<A_nnz; j++) {
341:       k = riip[A_icol[j]];
342:       if (k>=i) val[k] = A_val[j];
343:     }
345:     /*  Add regularization */
346:     val[i] += epsdiag;
348:     /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
349:         val(i) = A(i,i) - L(0:i-1,i)' * lvec */
350:     for (j=0; j<r_nnz; j++)  {
351:       k       = r_icol[j];
352:       lvec[k] = diag[k] * r_val[j];
353:       val[i] -= r_val[j] * lvec[k];
354:     }
356:     /* Calculate the new diagonal */
357:     diag[i] = val[i];
358:     if (PetscRealPart(diag[i])<droptol) {
359:       PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n");
360:       PetscInfo1(NULL,"Negative diagonal in row %d\n",i+1);
362:       /* Delete the whole matrix at once. */
363:       spbas_delete(retval);
364:       return NEGATIVE_DIAGONAL;
365:     }
367:     /* If necessary, allocate arrays */
368:     if (r_nnz==0) {
369:       spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
370:       if (ierr == PETSC_ERR_MEM) {
371:         spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
372:         spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
373:       } else 
374:       r_icol = retval.icols[i];
375:       r_val  = retval.values[i];
376:     }
378:     /* Now, fill in */
379:     r_icol[r_nnz] = i;
380:     r_val [r_nnz] = 1.0;
381:     r_nnz++;
382:     retval.row_nnz[i]++;
384:     retval.nnz += r_nnz;
386:     /* Calculate
387:         val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
388:     for (j=1; j<p_nnz; j++)  {
389:       k       = i+p_icol[j];
390:       r1_icol = retval.icols[k];
391:       r1_val  = retval.values[k];
392:       for (jL=0; jL<retval.row_nnz[k]; jL++) {
393:         val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
394:       }
395:       val[k] /= diag[i];
397:       if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
398:         /* If necessary, allocate arrays */
399:         if (retval.row_nnz[k]==0) {
400:           spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
401:           if (ierr == PETSC_ERR_MEM) {
402:             spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
403:             spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
404:             r_icol = retval.icols[i];
405:           } else 
406:         }
408:         retval.icols[k][retval.row_nnz[k]]  = i;
409:         retval.values[k][retval.row_nnz[k]] = val[k];
410:         retval.row_nnz[k]++;
411:       }
412:       val[k] = 0;
413:     }
415:     /* Erase the values used in the work arrays */
416:     for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0;
417:   }
419:   ierr=PetscFree(lvec);
420:   ierr=PetscFree(val);
422:   spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
423:   PetscFree(max_row_nnz);
425:   /* Place the inverse of the diagonals in the matrix */
426:   for (i=0; i<nrows; i++) {
427:     r_nnz = retval.row_nnz[i];
429:     retval.values[i][r_nnz-1] = 1.0 / diag[i];
430:     for (j=0; j<r_nnz-1; j++) {
431:       retval.values[i][j] *= -1;
432:     }
433:   }
434:   PetscFree(diag);
435:   *matrix_L = retval;
436:   return(0);
437: }