dcgn.f 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372
  1. *DECK DCGN
  2. SUBROUTINE DCGN (N, B, X, NELT, IA, JA, A, ISYM, MATVEC, MTTVEC,
  3. + MSOLVE, ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT, R, Z, P, ATP,
  4. + ATZ, DZ, ATDZ, RWORK, IWORK)
  5. C***BEGIN PROLOGUE DCGN
  6. C***PURPOSE Preconditioned CG Sparse Ax=b Solver for Normal Equations.
  7. C Routine to solve a general linear system Ax = b using the
  8. C Preconditioned Conjugate Gradient method applied to the
  9. C normal equations AA'y = b, x=A'y.
  10. C***LIBRARY SLATEC (SLAP)
  11. C***CATEGORY D2A4, D2B4
  12. C***TYPE DOUBLE PRECISION (SCGN-S, DCGN-D)
  13. C***KEYWORDS ITERATIVE PRECONDITION, NON-SYMMETRIC LINEAR SYSTEM SOLVE,
  14. C NORMAL EQUATIONS., SLAP, SPARSE
  15. C***AUTHOR Greenbaum, Anne, (Courant Institute)
  16. C Seager, Mark K., (LLNL)
  17. C Lawrence Livermore National Laboratory
  18. C PO BOX 808, L-60
  19. C Livermore, CA 94550 (510) 423-3141
  20. C seager@llnl.gov
  21. C***DESCRIPTION
  22. C
  23. C *Usage:
  24. C INTEGER N, NELT, IA(NELT), JA(NELT), ISYM, ITOL, ITMAX
  25. C INTEGER ITER, IERR, IUNIT, IWORK(USER DEFINED)
  26. C DOUBLE PRECISION B(N), X(N), A(NELT), TOL, ERR, R(N), Z(N)
  27. C DOUBLE PRECISION P(N), ATP(N), ATZ(N), DZ(N), ATDZ(N)
  28. C DOUBLE PRECISION RWORK(USER DEFINED)
  29. C EXTERNAL MATVEC, MTTVEC, MSOLVE
  30. C
  31. C CALL DCGN(N, B, X, NELT, IA, JA, A, ISYM, MATVEC, MTTVEC,
  32. C $ MSOLVE, ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT, R,
  33. C $ Z, P, ATP, ATZ, DZ, ATDZ, RWORK, IWORK)
  34. C
  35. C *Arguments:
  36. C N :IN Integer
  37. C Order of the Matrix.
  38. C B :IN Double Precision B(N).
  39. C Right-hand side vector.
  40. C X :INOUT Double Precision X(N).
  41. C On input X is your initial guess for solution vector.
  42. C On output X is the final approximate solution.
  43. C NELT :IN Integer.
  44. C Number of Non-Zeros stored in A.
  45. C IA :IN Integer IA(NELT).
  46. C JA :IN Integer JA(NELT).
  47. C A :IN Double Precision A(NELT).
  48. C These arrays contain the matrix data structure for A.
  49. C It could take any form. See "Description", below,
  50. C for more details.
  51. C ISYM :IN Integer.
  52. C Flag to indicate symmetric storage format.
  53. C If ISYM=0, all non-zero entries of the matrix are stored.
  54. C If ISYM=1, the matrix is symmetric, and only the upper
  55. C or lower triangle of the matrix is stored.
  56. C MATVEC :EXT External.
  57. C Name of a routine which performs the matrix vector multiply
  58. C y = A*X given A and X. The name of the MATVEC routine must
  59. C be declared external in the calling program. The calling
  60. C sequence to MATVEC is:
  61. C CALL MATVEC( N, X, Y, NELT, IA, JA, A, ISYM )
  62. C Where N is the number of unknowns, Y is the product A*X
  63. C upon return X is an input vector, NELT is the number of
  64. C non-zeros in the SLAP-Column IA, JA, A storage for the matrix
  65. C A. ISYM is a flag which, if non-zero, denotes that A is
  66. C symmetric and only the lower or upper triangle is stored.
  67. C MTTVEC :EXT External.
  68. C Name of a routine which performs the matrix transpose vector
  69. C multiply y = A'*X given A and X (where ' denotes transpose).
  70. C The name of the MTTVEC routine must be declared external in
  71. C the calling program. The calling sequence to MTTVEC is the
  72. C same as that for MATVEC, viz.:
  73. C CALL MTTVEC( N, X, Y, NELT, IA, JA, A, ISYM )
  74. C Where N is the number of unknowns, Y is the product A'*X
  75. C upon return X is an input vector, NELT is the number of
  76. C non-zeros in the SLAP-Column IA, JA, A storage for the matrix
  77. C A. ISYM is a flag which, if non-zero, denotes that A is
  78. C symmetric and only the lower or upper triangle is stored.
  79. C MSOLVE :EXT External.
  80. C Name of a routine which solves a linear system MZ = R for
  81. C Z given R with the preconditioning matrix M (M is supplied via
  82. C RWORK and IWORK arrays). The name of the MSOLVE routine must
  83. C be declared external in the calling program. The calling
  84. C sequence to MSOLVE is:
  85. C CALL MSOLVE(N, R, Z, NELT, IA, JA, A, ISYM, RWORK, IWORK)
  86. C Where N is the number of unknowns, R is the right-hand side
  87. C vector and Z is the solution upon return. NELT, IA, JA, A and
  88. C ISYM are defined as above. RWORK is a double precision array
  89. C that can be used to pass necessary preconditioning information
  90. C and/or workspace to MSOLVE. IWORK is an integer work array
  91. C for the same purpose as RWORK.
  92. C ITOL :IN Integer.
  93. C Flag to indicate type of convergence criterion.
  94. C If ITOL=1, iteration stops when the 2-norm of the residual
  95. C divided by the 2-norm of the right-hand side is less than TOL.
  96. C If ITOL=2, iteration stops when the 2-norm of M-inv times the
  97. C residual divided by the 2-norm of M-inv times the right hand
  98. C side is less than TOL, where M-inv is the inverse of the
  99. C diagonal of A.
  100. C ITOL=11 is often useful for checking and comparing different
  101. C routines. For this case, the user must supply the "exact"
  102. C solution or a very accurate approximation (one with an error
  103. C much less than TOL) through a common block,
  104. C COMMON /DSLBLK/ SOLN( )
  105. C If ITOL=11, iteration stops when the 2-norm of the difference
  106. C between the iterative approximation and the user-supplied
  107. C solution divided by the 2-norm of the user-supplied solution
  108. C is less than TOL. Note that this requires the user to set up
  109. C the "COMMON /DSLBLK/ SOLN(LENGTH)" in the calling routine.
  110. C The routine with this declaration should be loaded before the
  111. C stop test so that the correct length is used by the loader.
  112. C This procedure is not standard Fortran and may not work
  113. C correctly on your system (although it has worked on every
  114. C system the authors have tried). If ITOL is not 11 then this
  115. C common block is indeed standard Fortran.
  116. C TOL :INOUT Double Precision.
  117. C Convergence criterion, as described above. (Reset if IERR=4.)
  118. C ITMAX :IN Integer.
  119. C Maximum number of iterations.
  120. C ITER :OUT Integer.
  121. C Number of iterations required to reach convergence, or
  122. C ITMAX+1 if convergence criterion could not be achieved in
  123. C ITMAX iterations.
  124. C ERR :OUT Double Precision.
  125. C Error estimate of error in final approximate solution, as
  126. C defined by ITOL.
  127. C IERR :OUT Integer.
  128. C Return error flag.
  129. C IERR = 0 => All went well.
  130. C IERR = 1 => Insufficient space allocated for WORK or IWORK.
  131. C IERR = 2 => Method failed to converge in ITMAX steps.
  132. C IERR = 3 => Error in user input.
  133. C Check input values of N, ITOL.
  134. C IERR = 4 => User error tolerance set too tight.
  135. C Reset to 500*D1MACH(3). Iteration proceeded.
  136. C IERR = 5 => Preconditioning matrix, M, is not positive
  137. C definite. (r,z) < 0.
  138. C IERR = 6 => Matrix A is not positive definite. (p,Ap) < 0.
  139. C IUNIT :IN Integer.
  140. C Unit number on which to write the error at each iteration,
  141. C if this is desired for monitoring convergence. If unit
  142. C number is 0, no writing will occur.
  143. C R :WORK Double Precision R(N).
  144. C Z :WORK Double Precision Z(N).
  145. C P :WORK Double Precision P(N).
  146. C ATP :WORK Double Precision ATP(N).
  147. C ATZ :WORK Double Precision ATZ(N).
  148. C DZ :WORK Double Precision DZ(N).
  149. C ATDZ :WORK Double Precision ATDZ(N).
  150. C Double Precision arrays used for workspace.
  151. C RWORK :WORK Double Precision RWORK(USER DEFINED).
  152. C Double Precision array that can be used by MSOLVE.
  153. C IWORK :WORK Integer IWORK(USER DEFINED).
  154. C Integer array that can be used by MSOLVE.
  155. C
  156. C *Description:
  157. C This routine applies the preconditioned conjugate gradient
  158. C (PCG) method to a non-symmetric system of equations Ax=b. To
  159. C do this the normal equations are solved:
  160. C AA' y = b, where x = A'y.
  161. C In PCG method the iteration count is determined by condition
  162. C -1
  163. C number of the matrix (M A). In the situation where the
  164. C normal equations are used to solve a non-symmetric system
  165. C the condition number depends on AA' and should therefore be
  166. C much worse than that of A. This is the conventional wisdom.
  167. C When one has a good preconditioner for AA' this may not hold.
  168. C The latter is the situation when DCGN should be tried.
  169. C
  170. C If one is trying to solve a symmetric system, SCG should be
  171. C used instead.
  172. C
  173. C This routine does not care what matrix data structure is
  174. C used for A and M. It simply calls MATVEC, MTTVEC and MSOLVE
  175. C routines, with arguments as described above. The user could
  176. C write any type of structure, and appropriate MATVEC, MTTVEC
  177. C and MSOLVE routines. It is assumed that A is stored in the
  178. C IA, JA, A arrays in some fashion and that M (or INV(M)) is
  179. C stored in IWORK and RWORK) in some fashion. The SLAP
  180. C routines SSDCGN and SSLUCN are examples of this procedure.
  181. C
  182. C Two examples of matrix data structures are the: 1) SLAP
  183. C Triad format and 2) SLAP Column format.
  184. C
  185. C =================== S L A P Triad format ===================
  186. C
  187. C In this format only the non-zeros are stored. They may
  188. C appear in *ANY* order. The user supplies three arrays of
  189. C length NELT, where NELT is the number of non-zeros in the
  190. C matrix: (IA(NELT), JA(NELT), A(NELT)). For each non-zero
  191. C the user puts the row and column index of that matrix
  192. C element in the IA and JA arrays. The value of the non-zero
  193. C matrix element is placed in the corresponding location of
  194. C the A array. This is an extremely easy data structure to
  195. C generate. On the other hand it is not too efficient on
  196. C vector computers for the iterative solution of linear
  197. C systems. Hence, SLAP changes this input data structure to
  198. C the SLAP Column format for the iteration (but does not
  199. C change it back).
  200. C
  201. C Here is an example of the SLAP Triad storage format for a
  202. C 5x5 Matrix. Recall that the entries may appear in any order.
  203. C
  204. C 5x5 Matrix SLAP Triad format for 5x5 matrix on left.
  205. C 1 2 3 4 5 6 7 8 9 10 11
  206. C |11 12 0 0 15| A: 51 12 11 33 15 53 55 22 35 44 21
  207. C |21 22 0 0 0| IA: 5 1 1 3 1 5 5 2 3 4 2
  208. C | 0 0 33 0 35| JA: 1 2 1 3 5 3 5 2 5 4 1
  209. C | 0 0 0 44 0|
  210. C |51 0 53 0 55|
  211. C
  212. C =================== S L A P Column format ==================
  213. C
  214. C In this format the non-zeros are stored counting down
  215. C columns (except for the diagonal entry, which must appear
  216. C first in each "column") and are stored in the double pre-
  217. C cision array A. In other words, for each column in the
  218. C matrix first put the diagonal entry in A. Then put in the
  219. C other non-zero elements going down the column (except the
  220. C diagonal) in order. The IA array holds the row index for
  221. C each non-zero. The JA array holds the offsets into the IA,
  222. C A arrays for the beginning of each column. That is,
  223. C IA(JA(ICOL)),A(JA(ICOL)) are the first elements of the ICOL-
  224. C th column in IA and A, and IA(JA(ICOL+1)-1), A(JA(ICOL+1)-1)
  225. C are the last elements of the ICOL-th column. Note that we
  226. C always have JA(N+1)=NELT+1, where N is the number of columns
  227. C in the matrix and NELT is the number of non-zeros in the
  228. C matrix.
  229. C
  230. C Here is an example of the SLAP Column storage format for a
  231. C 5x5 Matrix (in the A and IA arrays '|' denotes the end of a
  232. C column):
  233. C
  234. C 5x5 Matrix SLAP Column format for 5x5 matrix on left.
  235. C 1 2 3 4 5 6 7 8 9 10 11
  236. C |11 12 0 0 15| A: 11 21 51 | 22 12 | 33 53 | 44 | 55 15 35
  237. C |21 22 0 0 0| IA: 1 2 5 | 2 1 | 3 5 | 4 | 5 1 3
  238. C | 0 0 33 0 35| JA: 1 4 6 8 9 12
  239. C | 0 0 0 44 0|
  240. C |51 0 53 0 55|
  241. C
  242. C *Cautions:
  243. C This routine will attempt to write to the Fortran logical output
  244. C unit IUNIT, if IUNIT .ne. 0. Thus, the user must make sure that
  245. C this logical unit is attached to a file or terminal before calling
  246. C this routine with a non-zero value for IUNIT. This routine does
  247. C not check for the validity of a non-zero IUNIT unit number.
  248. C
  249. C***SEE ALSO DSDCGN, DSLUCN, ISDCGN
  250. C***REFERENCES 1. Mark K. Seager, A SLAP for the Masses, in
  251. C G. F. Carey, Ed., Parallel Supercomputing: Methods,
  252. C Algorithms and Applications, Wiley, 1989, pp.135-155.
  253. C***ROUTINES CALLED D1MACH, DAXPY, DCOPY, DDOT, ISDCGN
  254. C***REVISION HISTORY (YYMMDD)
  255. C 890404 DATE WRITTEN
  256. C 890404 Previous REVISION DATE
  257. C 890915 Made changes requested at July 1989 CML Meeting. (MKS)
  258. C 890921 Removed TeX from comments. (FNF)
  259. C 890922 Numerous changes to prologue to make closer to SLATEC
  260. C standard. (FNF)
  261. C 890929 Numerous changes to reduce SP/DP differences. (FNF)
  262. C 891004 Added new reference.
  263. C 910411 Prologue converted to Version 4.0 format. (BAB)
  264. C 910502 Removed MATVEC, MTTVEC and MSOLVE from ROUTINES CALLED
  265. C list. (FNF)
  266. C 920407 COMMON BLOCK renamed DSLBLK. (WRB)
  267. C 920511 Added complete declaration section. (WRB)
  268. C 920929 Corrected format of reference. (FNF)
  269. C 921019 Changed 500.0 to 500 to reduce SP/DP differences. (FNF)
  270. C 921113 Corrected C***CATEGORY line. (FNF)
  271. C***END PROLOGUE DCGN
  272. C .. Scalar Arguments ..
  273. DOUBLE PRECISION ERR, TOL
  274. INTEGER IERR, ISYM, ITER, ITMAX, ITOL, IUNIT, N, NELT
  275. C .. Array Arguments ..
  276. DOUBLE PRECISION A(N), ATDZ(N), ATP(N), ATZ(N), B(N), DZ(N), P(N),
  277. + R(N), RWORK(*), X(N), Z(N)
  278. INTEGER IA(NELT), IWORK(*), JA(NELT)
  279. C .. Subroutine Arguments ..
  280. EXTERNAL MATVEC, MSOLVE, MTTVEC
  281. C .. Local Scalars ..
  282. DOUBLE PRECISION AK, AKDEN, BK, BKDEN, BKNUM, BNRM, SOLNRM, TOLMIN
  283. INTEGER I, K
  284. C .. External Functions ..
  285. DOUBLE PRECISION D1MACH, DDOT
  286. INTEGER ISDCGN
  287. EXTERNAL D1MACH, DDOT, ISDCGN
  288. C .. External Subroutines ..
  289. EXTERNAL DAXPY, DCOPY
  290. C***FIRST EXECUTABLE STATEMENT DCGN
  291. C
  292. C Check user input.
  293. C
  294. ITER = 0
  295. IERR = 0
  296. IF( N.LT.1 ) THEN
  297. IERR = 3
  298. RETURN
  299. ENDIF
  300. TOLMIN = 500*D1MACH(3)
  301. IF( TOL.LT.TOLMIN ) THEN
  302. TOL = TOLMIN
  303. IERR = 4
  304. ENDIF
  305. C Calculate initial residual and pseudo-residual, and check
  306. C stopping criterion.
  307. CALL MATVEC(N, X, R, NELT, IA, JA, A, ISYM)
  308. DO 10 I = 1, N
  309. R(I) = B(I) - R(I)
  310. 10 CONTINUE
  311. CALL MSOLVE(N, R, Z, NELT, IA, JA, A, ISYM, RWORK, IWORK)
  312. CALL MTTVEC(N, Z, ATZ, NELT, IA, JA, A, ISYM)
  313. C
  314. IF( ISDCGN(N, B, X, NELT, IA, JA, A, ISYM, MATVEC, MTTVEC, MSOLVE,
  315. $ ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT, R, Z, P, ATP, ATZ,
  316. $ DZ, ATDZ, RWORK, IWORK, AK, BK, BNRM, SOLNRM) .NE. 0 )
  317. $ GO TO 200
  318. IF( IERR.NE.0 ) RETURN
  319. C
  320. C ***** iteration loop *****
  321. C
  322. DO 100 K=1,ITMAX
  323. ITER = K
  324. C
  325. C Calculate coefficient BK and direction vector P.
  326. BKNUM = DDOT(N, Z, 1, R, 1)
  327. IF( BKNUM.LE.0.0D0 ) THEN
  328. IERR = 6
  329. RETURN
  330. ENDIF
  331. IF(ITER .EQ. 1) THEN
  332. CALL DCOPY(N, Z, 1, P, 1)
  333. ELSE
  334. BK = BKNUM/BKDEN
  335. DO 20 I = 1, N
  336. P(I) = Z(I) + BK*P(I)
  337. 20 CONTINUE
  338. ENDIF
  339. BKDEN = BKNUM
  340. C
  341. C Calculate coefficient AK, new iterate X, new residual R,
  342. C and new pseudo-residual ATZ.
  343. IF(ITER .NE. 1) CALL DAXPY(N, BK, ATP, 1, ATZ, 1)
  344. CALL DCOPY(N, ATZ, 1, ATP, 1)
  345. AKDEN = DDOT(N, ATP, 1, ATP, 1)
  346. IF( AKDEN.LE.0.0D0 ) THEN
  347. IERR = 6
  348. RETURN
  349. ENDIF
  350. AK = BKNUM/AKDEN
  351. CALL DAXPY(N, AK, ATP, 1, X, 1)
  352. CALL MATVEC(N, ATP, Z, NELT, IA, JA, A, ISYM)
  353. CALL DAXPY(N, -AK, Z, 1, R, 1)
  354. CALL MSOLVE(N, R, Z, NELT, IA, JA, A, ISYM, RWORK, IWORK)
  355. CALL MTTVEC(N, Z, ATZ, NELT, IA, JA, A, ISYM)
  356. C
  357. C check stopping criterion.
  358. IF( ISDCGN(N, B, X, NELT, IA, JA, A, ISYM, MATVEC, MTTVEC,
  359. $ MSOLVE, ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT, R,
  360. $ Z, P, ATP, ATZ, DZ, ATDZ, RWORK, IWORK, AK, BK, BNRM,
  361. $ SOLNRM) .NE. 0) GOTO 200
  362. C
  363. 100 CONTINUE
  364. C
  365. C ***** end of loop *****
  366. C
  367. C stopping criterion not satisfied.
  368. ITER = ITMAX + 1
  369. C
  370. 200 RETURN
  371. C------------- LAST LINE OF DCGN FOLLOWS ----------------------------
  372. END