dcgs.f 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. *DECK DCGS
  2. SUBROUTINE DCGS (N, B, X, NELT, IA, JA, A, ISYM, MATVEC, MSOLVE,
  3. + ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT, R, R0, P, Q, U, V1,
  4. + V2, RWORK, IWORK)
  5. C***BEGIN PROLOGUE DCGS
  6. C***PURPOSE Preconditioned BiConjugate Gradient Squared Ax=b Solver.
  7. C Routine to solve a Non-Symmetric linear system Ax = b
  8. C using the Preconditioned BiConjugate Gradient Squared
  9. C method.
  10. C***LIBRARY SLATEC (SLAP)
  11. C***CATEGORY D2A4, D2B4
  12. C***TYPE DOUBLE PRECISION (SCGS-S, DCGS-D)
  13. C***KEYWORDS BICONJUGATE GRADIENT, ITERATIVE PRECONDITION,
  14. C NON-SYMMETRIC LINEAR SYSTEM, SLAP, SPARSE
  15. C***AUTHOR Greenbaum, Anne, (Courant Institute)
  16. C Seager, Mark K., (LLNL)
  17. C Lawrence Livermore National Laboratory
  18. C PO BOX 808, L-60
  19. C Livermore, CA 94550 (510) 423-3141
  20. C seager@llnl.gov
  21. C***DESCRIPTION
  22. C
  23. C *Usage:
  24. C INTEGER N, NELT, IA(NELT), JA(NELT), ISYM, ITOL, ITMAX
  25. C INTEGER ITER, IERR, IUNIT, IWORK(USER DEFINED)
  26. C DOUBLE PRECISION B(N), X(N), A(NELT), TOL, ERR, R(N), R0(N), P(N)
  27. C DOUBLE PRECISION Q(N), U(N), V1(N), V2(N), RWORK(USER DEFINED)
  28. C EXTERNAL MATVEC, MSOLVE
  29. C
  30. C CALL DCGS(N, B, X, NELT, IA, JA, A, ISYM, MATVEC,
  31. C $ MSOLVE, ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT,
  32. C $ R, R0, P, Q, U, V1, V2, RWORK, IWORK)
  33. C
  34. C *Arguments:
  35. C N :IN Integer
  36. C Order of the Matrix.
  37. C B :IN Double Precision B(N).
  38. C Right-hand side vector.
  39. C X :INOUT Double Precision X(N).
  40. C On input X is your initial guess for solution vector.
  41. C On output X is the final approximate solution.
  42. C NELT :IN Integer.
  43. C Number of Non-Zeros stored in A.
  44. C IA :IN Integer IA(NELT).
  45. C JA :IN Integer JA(NELT).
  46. C A :IN Double Precision A(NELT).
  47. C These arrays contain the matrix data structure for A.
  48. C It could take any form. See "Description", below,
  49. C for more details.
  50. C ISYM :IN Integer.
  51. C Flag to indicate symmetric storage format.
  52. C If ISYM=0, all non-zero entries of the matrix are stored.
  53. C If ISYM=1, the matrix is symmetric, and only the upper
  54. C or lower triangle of the matrix is stored.
  55. C MATVEC :EXT External.
  56. C Name of a routine which performs the matrix vector multiply
  57. C operation Y = A*X given A and X. The name of the MATVEC
  58. C routine must be declared external in the calling program.
  59. C The calling sequence of MATVEC is:
  60. C CALL MATVEC( N, X, Y, NELT, IA, JA, A, ISYM )
  61. C Where N is the number of unknowns, Y is the product A*X upon
  62. C return, X is an input vector. NELT, IA, JA, A and ISYM
  63. C define the SLAP matrix data structure: see Description,below.
  64. C MSOLVE :EXT External.
  65. C Name of a routine which solves a linear system MZ = R for Z
  66. C given R with the preconditioning matrix M (M is supplied via
  67. C RWORK and IWORK arrays). The name of the MSOLVE routine
  68. C must be declared external in the calling program. The
  69. C calling sequence of MSOLVE is:
  70. C CALL MSOLVE(N, R, Z, NELT, IA, JA, A, ISYM, RWORK, IWORK)
  71. C Where N is the number of unknowns, R is the right-hand side
  72. C vector, and Z is the solution upon return. NELT, IA, JA, A
  73. C and ISYM define the SLAP matrix data structure: see
  74. C Description, below. RWORK is a double precision array that
  75. C can be used to pass necessary preconditioning information and/
  76. C or workspace to MSOLVE. IWORK is an integer work array for
  77. C the same purpose as RWORK.
  78. C ITOL :IN Integer.
  79. C Flag to indicate type of convergence criterion.
  80. C If ITOL=1, iteration stops when the 2-norm of the residual
  81. C divided by the 2-norm of the right-hand side is less than TOL.
  82. C This routine must calculate the residual from R = A*X - B.
  83. C This is unnatural and hence expensive for this type of iter-
  84. C ative method. ITOL=2 is *STRONGLY* recommended.
  85. C If ITOL=2, iteration stops when the 2-norm of M-inv times the
  86. C residual divided by the 2-norm of M-inv times the right hand
  87. C side is less than TOL, where M-inv time a vector is the pre-
  88. C conditioning step. This is the *NATURAL* stopping for this
  89. C iterative method and is *STRONGLY* recommended.
  90. C ITOL=11 is often useful for checking and comparing different
  91. C routines. For this case, the user must supply the "exact"
  92. C solution or a very accurate approximation (one with an error
  93. C much less than TOL) through a common block,
  94. C COMMON /DSLBLK/ SOLN( )
  95. C If ITOL=11, iteration stops when the 2-norm of the difference
  96. C between the iterative approximation and the user-supplied
  97. C solution divided by the 2-norm of the user-supplied solution
  98. C is less than TOL.
  99. C TOL :INOUT Double Precision.
  100. C Convergence criterion, as described above. (Reset if IERR=4.)
  101. C ITMAX :IN Integer.
  102. C Maximum number of iterations.
  103. C ITER :OUT Integer.
  104. C Number of iterations required to reach convergence, or
  105. C ITMAX+1 if convergence criterion could not be achieved in
  106. C ITMAX iterations.
  107. C ERR :OUT Double Precision.
  108. C Error estimate of error in final approximate solution, as
  109. C defined by ITOL.
  110. C IERR :OUT Integer.
  111. C Return error flag.
  112. C IERR = 0 => All went well.
  113. C IERR = 1 => Insufficient space allocated for WORK or IWORK.
  114. C IERR = 2 => Method failed to converge in ITMAX steps.
  115. C IERR = 3 => Error in user input.
  116. C Check input values of N, ITOL.
  117. C IERR = 4 => User error tolerance set too tight.
  118. C Reset to 500*D1MACH(3). Iteration proceeded.
  119. C IERR = 5 => Breakdown of the method detected.
  120. C (r0,r) approximately 0.
  121. C IERR = 6 => Stagnation of the method detected.
  122. C (r0,v) approximately 0.
  123. C IUNIT :IN Integer.
  124. C Unit number on which to write the error at each iteration,
  125. C if this is desired for monitoring convergence. If unit
  126. C number is 0, no writing will occur.
  127. C R :WORK Double Precision R(N).
  128. C R0 :WORK Double Precision R0(N).
  129. C P :WORK Double Precision P(N).
  130. C Q :WORK Double Precision Q(N).
  131. C U :WORK Double Precision U(N).
  132. C V1 :WORK Double Precision V1(N).
  133. C V2 :WORK Double Precision V2(N).
  134. C Double Precision arrays used for workspace.
  135. C RWORK :WORK Double Precision RWORK(USER DEFINED).
  136. C Double Precision array that can be used for workspace in
  137. C MSOLVE.
  138. C IWORK :WORK Integer IWORK(USER DEFINED).
  139. C Integer array that can be used for workspace in MSOLVE.
  140. C
  141. C *Description
  142. C This routine does not care what matrix data structure is
  143. C used for A and M. It simply calls the MATVEC and MSOLVE
  144. C routines, with the arguments as described above. The user
  145. C could write any type of structure and the appropriate MATVEC
  146. C and MSOLVE routines. It is assumed that A is stored in the
  147. C IA, JA, A arrays in some fashion and that M (or INV(M)) is
  148. C stored in IWORK and RWORK in some fashion. The SLAP
  149. C routines DSDBCG and DSLUCS are examples of this procedure.
  150. C
  151. C Two examples of matrix data structures are the: 1) SLAP
  152. C Triad format and 2) SLAP Column format.
  153. C
  154. C =================== S L A P Triad format ===================
  155. C
  156. C In this format only the non-zeros are stored. They may
  157. C appear in *ANY* order. The user supplies three arrays of
  158. C length NELT, where NELT is the number of non-zeros in the
  159. C matrix: (IA(NELT), JA(NELT), A(NELT)). For each non-zero
  160. C the user puts the row and column index of that matrix
  161. C element in the IA and JA arrays. The value of the non-zero
  162. C matrix element is placed in the corresponding location of
  163. C the A array. This is an extremely easy data structure to
  164. C generate. On the other hand it is not too efficient on
  165. C vector computers for the iterative solution of linear
  166. C systems. Hence, SLAP changes this input data structure to
  167. C the SLAP Column format for the iteration (but does not
  168. C change it back).
  169. C
  170. C Here is an example of the SLAP Triad storage format for a
  171. C 5x5 Matrix. Recall that the entries may appear in any order.
  172. C
  173. C 5x5 Matrix SLAP Triad format for 5x5 matrix on left.
  174. C 1 2 3 4 5 6 7 8 9 10 11
  175. C |11 12 0 0 15| A: 51 12 11 33 15 53 55 22 35 44 21
  176. C |21 22 0 0 0| IA: 5 1 1 3 1 5 5 2 3 4 2
  177. C | 0 0 33 0 35| JA: 1 2 1 3 5 3 5 2 5 4 1
  178. C | 0 0 0 44 0|
  179. C |51 0 53 0 55|
  180. C
  181. C =================== S L A P Column format ==================
  182. C
  183. C In this format the non-zeros are stored counting down
  184. C columns (except for the diagonal entry, which must appear
  185. C first in each "column") and are stored in the double pre-
  186. C cision array A. In other words, for each column in the
  187. C matrix first put the diagonal entry in A. Then put in the
  188. C other non-zero elements going down the column (except the
  189. C diagonal) in order. The IA array holds the row index for
  190. C each non-zero. The JA array holds the offsets into the IA,
  191. C A arrays for the beginning of each column. That is,
  192. C IA(JA(ICOL)),A(JA(ICOL)) are the first elements of the ICOL-
  193. C th column in IA and A, and IA(JA(ICOL+1)-1), A(JA(ICOL+1)-1)
  194. C are the last elements of the ICOL-th column. Note that we
  195. C always have JA(N+1)=NELT+1, where N is the number of columns
  196. C in the matrix and NELT is the number of non-zeros in the
  197. C matrix.
  198. C
  199. C Here is an example of the SLAP Column storage format for a
  200. C 5x5 Matrix (in the A and IA arrays '|' denotes the end of a
  201. C column):
  202. C
  203. C 5x5 Matrix SLAP Column format for 5x5 matrix on left.
  204. C 1 2 3 4 5 6 7 8 9 10 11
  205. C |11 12 0 0 15| A: 11 21 51 | 22 12 | 33 53 | 44 | 55 15 35
  206. C |21 22 0 0 0| IA: 1 2 5 | 2 1 | 3 5 | 4 | 5 1 3
  207. C | 0 0 33 0 35| JA: 1 4 6 8 9 12
  208. C | 0 0 0 44 0|
  209. C |51 0 53 0 55|
  210. C
  211. C *Cautions:
  212. C This routine will attempt to write to the Fortran logical output
  213. C unit IUNIT, if IUNIT .ne. 0. Thus, the user must make sure that
  214. C this logical unit is attached to a file or terminal before calling
  215. C this routine with a non-zero value for IUNIT. This routine does
  216. C not check for the validity of a non-zero IUNIT unit number.
  217. C
  218. C***SEE ALSO DSDCGS, DSLUCS
  219. C***REFERENCES 1. P. Sonneveld, CGS, a fast Lanczos-type solver
  220. C for nonsymmetric linear systems, Delft University
  221. C of Technology Report 84-16, Department of Mathe-
  222. C matics and Informatics, Delft, The Netherlands.
  223. C 2. E. F. Kaasschieter, The solution of non-symmetric
  224. C linear systems by biconjugate gradients or conjugate
  225. C gradients squared, Delft University of Technology
  226. C Report 86-21, Department of Mathematics and Informa-
  227. C tics, Delft, The Netherlands.
  228. C 3. Mark K. Seager, A SLAP for the Masses, in
  229. C G. F. Carey, Ed., Parallel Supercomputing: Methods,
  230. C Algorithms and Applications, Wiley, 1989, pp.135-155.
  231. C***ROUTINES CALLED D1MACH, DAXPY, DDOT, ISDCGS
  232. C***REVISION HISTORY (YYMMDD)
  233. C 890404 DATE WRITTEN
  234. C 890404 Previous REVISION DATE
  235. C 890915 Made changes requested at July 1989 CML Meeting. (MKS)
  236. C 890921 Removed TeX from comments. (FNF)
  237. C 890922 Numerous changes to prologue to make closer to SLATEC
  238. C standard. (FNF)
  239. C 890929 Numerous changes to reduce SP/DP differences. (FNF)
  240. C 891004 Added new reference.
  241. C 910411 Prologue converted to Version 4.0 format. (BAB)
  242. C 910502 Removed MATVEC and MSOLVE from ROUTINES CALLED list. (FNF)
  243. C 920407 COMMON BLOCK renamed DSLBLK. (WRB)
  244. C 920511 Added complete declaration section. (WRB)
  245. C 920929 Corrected format of references. (FNF)
  246. C 921019 Changed 500.0 to 500 to reduce SP/DP differences. (FNF)
  247. C 921113 Corrected C***CATEGORY line. (FNF)
  248. C***END PROLOGUE DCGS
  249. C .. Scalar Arguments ..
  250. DOUBLE PRECISION ERR, TOL
  251. INTEGER IERR, ISYM, ITER, ITMAX, ITOL, IUNIT, N, NELT
  252. C .. Array Arguments ..
  253. DOUBLE PRECISION A(NELT), B(N), P(N), Q(N), R(N), R0(N), RWORK(*),
  254. + U(N), V1(N), V2(N), X(N)
  255. INTEGER IA(NELT), IWORK(*), JA(NELT)
  256. C .. Subroutine Arguments ..
  257. EXTERNAL MATVEC, MSOLVE
  258. C .. Local Scalars ..
  259. DOUBLE PRECISION AK, AKM, BK, BNRM, FUZZ, RHON, RHONM1, SIGMA,
  260. + SOLNRM, TOLMIN
  261. INTEGER I, K
  262. C .. External Functions ..
  263. DOUBLE PRECISION D1MACH, DDOT
  264. INTEGER ISDCGS
  265. EXTERNAL D1MACH, DDOT, ISDCGS
  266. C .. External Subroutines ..
  267. EXTERNAL DAXPY
  268. C .. Intrinsic Functions ..
  269. INTRINSIC ABS
  270. C***FIRST EXECUTABLE STATEMENT DCGS
  271. C
  272. C Check some of the input data.
  273. C
  274. ITER = 0
  275. IERR = 0
  276. IF( N.LT.1 ) THEN
  277. IERR = 3
  278. RETURN
  279. ENDIF
  280. TOLMIN = 500*D1MACH(3)
  281. IF( TOL.LT.TOLMIN ) THEN
  282. TOL = TOLMIN
  283. IERR = 4
  284. ENDIF
  285. C
  286. C Calculate initial residual and pseudo-residual, and check
  287. C stopping criterion.
  288. CALL MATVEC(N, X, R, NELT, IA, JA, A, ISYM)
  289. DO 10 I = 1, N
  290. V1(I) = R(I) - B(I)
  291. 10 CONTINUE
  292. CALL MSOLVE(N, V1, R, NELT, IA, JA, A, ISYM, RWORK, IWORK)
  293. C
  294. IF( ISDCGS(N, B, X, NELT, IA, JA, A, ISYM, MATVEC, MSOLVE,
  295. $ ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT, R, R0, P, Q,
  296. $ U, V1, V2, RWORK, IWORK, AK, BK, BNRM, SOLNRM) .NE. 0 )
  297. $ GO TO 200
  298. IF( IERR.NE.0 ) RETURN
  299. C
  300. C Set initial values.
  301. C
  302. FUZZ = D1MACH(3)**2
  303. DO 20 I = 1, N
  304. R0(I) = R(I)
  305. 20 CONTINUE
  306. RHONM1 = 1
  307. C
  308. C ***** ITERATION LOOP *****
  309. C
  310. DO 100 K=1,ITMAX
  311. ITER = K
  312. C
  313. C Calculate coefficient BK and direction vectors U, V and P.
  314. RHON = DDOT(N, R0, 1, R, 1)
  315. IF( ABS(RHONM1).LT.FUZZ ) GOTO 998
  316. BK = RHON/RHONM1
  317. IF( ITER.EQ.1 ) THEN
  318. DO 30 I = 1, N
  319. U(I) = R(I)
  320. P(I) = R(I)
  321. 30 CONTINUE
  322. ELSE
  323. DO 40 I = 1, N
  324. U(I) = R(I) + BK*Q(I)
  325. V1(I) = Q(I) + BK*P(I)
  326. 40 CONTINUE
  327. DO 50 I = 1, N
  328. P(I) = U(I) + BK*V1(I)
  329. 50 CONTINUE
  330. ENDIF
  331. C
  332. C Calculate coefficient AK, new iterate X, Q
  333. CALL MATVEC(N, P, V2, NELT, IA, JA, A, ISYM)
  334. CALL MSOLVE(N, V2, V1, NELT, IA, JA, A, ISYM, RWORK, IWORK)
  335. SIGMA = DDOT(N, R0, 1, V1, 1)
  336. IF( ABS(SIGMA).LT.FUZZ ) GOTO 999
  337. AK = RHON/SIGMA
  338. AKM = -AK
  339. DO 60 I = 1, N
  340. Q(I) = U(I) + AKM*V1(I)
  341. 60 CONTINUE
  342. DO 70 I = 1, N
  343. V1(I) = U(I) + Q(I)
  344. 70 CONTINUE
  345. C X = X - ak*V1.
  346. CALL DAXPY( N, AKM, V1, 1, X, 1 )
  347. C -1
  348. C R = R - ak*M *A*V1
  349. CALL MATVEC(N, V1, V2, NELT, IA, JA, A, ISYM)
  350. CALL MSOLVE(N, V2, V1, NELT, IA, JA, A, ISYM, RWORK, IWORK)
  351. CALL DAXPY( N, AKM, V1, 1, R, 1 )
  352. C
  353. C check stopping criterion.
  354. IF( ISDCGS(N, B, X, NELT, IA, JA, A, ISYM, MATVEC, MSOLVE,
  355. $ ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT, R, R0, P, Q,
  356. $ U, V1, V2, RWORK, IWORK, AK, BK, BNRM, SOLNRM) .NE. 0 )
  357. $ GO TO 200
  358. C
  359. C Update RHO.
  360. RHONM1 = RHON
  361. 100 CONTINUE
  362. C
  363. C ***** end of loop *****
  364. C Stopping criterion not satisfied.
  365. ITER = ITMAX + 1
  366. IERR = 2
  367. 200 RETURN
  368. C
  369. C Breakdown of method detected.
  370. 998 IERR = 5
  371. RETURN
  372. C
  373. C Stagnation of method detected.
  374. 999 IERR = 6
  375. RETURN
  376. C------------- LAST LINE OF DCGS FOLLOWS ----------------------------
  377. END