-
Notifications
You must be signed in to change notification settings - Fork 0
/
hsl_ma87d.f90
5402 lines (4537 loc) · 184 KB
/
hsl_ma87d.f90
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
! COPYRIGHT (c) 2009 Science and Technology Facilities Council
! Original date 27 April 2009, Version 1.0.0
!
! Written by: Jonathan Hogg, John Reid and Jennifer Scott
! Version 2.4.0
! See ChangeLog for version history
!
! To convert from double:
! * Change wp
! * Change _double
! * Change BLAS calls: dgemv, dgemm, dsyrk, dtrsm, dtrsv
! * Change LAPACK calls: dpotrf
!
module hsl_MA87_double
!$ use omp_lib
use hsl_mc78_integer
use hsl_mc34_double
implicit none
private
public :: ma87_keep, ma87_control, ma87_info
public :: ma87_analyse, ma87_factor, ma87_factor_solve, ma87_solve, &
ma87_sparse_fwd_solve, ma87_finalise
public :: ma87_get_n__
! Parameters
! Data kinds
integer, parameter :: wp = kind(0d0)
integer, parameter :: long = selected_int_kind(18)
! Numerical constants
real(wp), parameter :: one = 1.0_wp
real(wp), parameter :: zero = 0.0_wp
! Default values
integer, parameter :: nemin_default = 32
! node amalgamation parameter
integer, parameter :: nb_default = 256
! Block size with dense kernel
integer, parameter :: pool_default = 25000
! size of task pool
! Symbolic constants
! These flag the different tasks within factor and solve
integer, parameter :: TASK_DONE = -1
integer, parameter :: TASK_NONE = 0
integer, parameter :: TASK_FACTORIZE_BLOCK = 1
integer, parameter :: TASK_UPDATE_INTERNAL = 3
integer, parameter :: TASK_UPDATE_BETWEEN = 4
integer, parameter :: TASK_SOLVE_BLOCK = 5
integer, parameter :: TASK_SLV_FSLV = 6
! Fwds solve on diag block
integer, parameter :: TASK_SLV_FUPD = 7
! Fwds update in solve
integer, parameter :: TASK_SLV_BSLV = 8
! Bwds solve on diag block
integer, parameter :: TASK_SLV_BUPD = 9
! Bwds update in solve
! Types of solve job
integer, parameter :: SOLVE_JOB_ALL = 0
integer, parameter :: SOLVE_JOB_FWD = 1
integer, parameter :: SOLVE_JOB_BWD = 2
! How processors share cache Example
integer, parameter :: CACHE_COMPACT = 1
! [0,1], [2,3], [4,5], [6,7]
integer, parameter :: CACHE_SCATTER = 2
! [0,4]. [1,5], [2,6], [3,7]
integer, parameter :: CACHE_IDENTITY = 3
! 0, 1, 2, 3, 4, 5, 6, 7
! Error flags
integer, parameter :: MA87_SUCCESS = 0
integer, parameter :: MA87_ERROR_ALLOCATION = -1
integer, parameter :: MA87_ERROR_ORDER = -2
integer, parameter :: MA87_ERROR_NOT_POSDEF = -3
integer, parameter :: MA87_ERROR_X_SIZE = -4
integer, parameter :: MA87_ERROR_INFINITY = -5
integer, parameter :: MA87_ERROR_JOB_OOR = -6
integer, parameter :: MA87_ERROR_NBI_OOR = -7
integer, parameter :: MA87_ERROR_UNKNOWN = -99
! warning flags
integer, parameter :: MA87_WARNING_POOL_SMALL = 1
integer, parameter :: MA87_WARNING_SEMI_DEFINITE = 2
!*************************************************
interface MA87_analyse
module procedure MA87_analyse_double
end interface
interface MA87_factor
module procedure MA87_factor_double
end interface
interface MA87_factor_solve
module procedure MA87_factor_solve_one_double, &
MA87_factor_solve_mult_double
end interface
interface MA87_solve
module procedure MA87_solve_one_double, MA87_solve_mult_double
end interface
interface MA87_sparse_fwd_solve
module procedure MA87_sparse_fwd_solve_double
end interface
interface MA87_finalise
module procedure MA87_finalise_double
end interface
interface ma87_get_n__
module procedure ma87_get_n_double
end interface ma87_get_n__
!*************************************************
! Data type for storing information for each block (BLK)
! The blocks are numbered 1,2,..., keep%final_blk
type block_type
! Static info, which is set in ma87_analayse
integer :: bcol ! block column that blk belongs to
integer :: blkm ! height of block (number of rows in blk)
integer :: blkn ! width of block (number of columns in blk)
integer(long) :: dblk ! id of the block on the diagonal within the
! block column to which blk belongs
integer :: dep_initial ! initial dependency count for block,
integer(long) :: id ! The block identitifier (ie, its number blk)
integer(long) :: last_blk ! id of the last block within the
! block column to which blk belongs
integer :: node ! node to which blk belongs
integer :: sa ! posn of the first entry of the
! block blk within the array that holds the block column of L
! that blk belongs to.
! Non-static info
integer :: dep ! dependency countdown/marker. Once factor or solve done,
! value is -2.
!$ integer(omp_lock_kind) :: lock ! Lock for altering dep
!$ integer(omp_lock_kind) :: alock ! Lock for altering values in keep%lfact
! for this block.
! Note: locks initialised in ma87_analyse and destroyed
! in ma87_finalise
end type block_type
!*************************************************
! Derived type for holding data for each node.
! This information is set up by ma87_analyse once the assembly tree
! has been constructed.
type node_type
integer(long) :: blk_sa ! identifier of the first block in node
integer(long) :: blk_en ! identifier of the last block in node
integer :: nb ! Block size for nodal matrix
! If number of cols nc in nodal matrix is less than control%nb but
! number of rows is large, the block size for the node is taken as
! control%nb**2/nc, rounded up to a multiple of 8. The aim is for
! the numbers of entries in the blocks to be similar to those in the
! normal case.
integer :: sa ! index (in pivotal order) of the first column of the node
integer :: en ! index (in pivotal order) of the last column of the node
integer, allocatable :: index(:) ! holds the permuted variable
! list for node. They are sorted into increasing order.
! index is set up by ma87_analyse
integer :: nchild ! number of children node has in assembly tree
integer, allocatable :: child(:) ! holds children of node
integer :: parent ! Parent of node in assembly tree
integer :: least_desc ! Least descendant in assembly tree
end type node_type
!*************************************************
! Data type that represents a single block column in L
! (allocated by ma87_analyse)
type lfactor
real(wp), dimension(:), allocatable :: lcol ! holds block column
end type lfactor
!*************************************************
! Data type for storing mapping from user's matrix values into
! block columns of L
type lmap_type
integer(long) :: len_map ! length of map
integer(long), allocatable :: map(:,:) ! holds map from user's val
! array into lfact(:)%lcol values as follows:
! lcol( map(1,i) ) += val( map(2,i) ) i = 1:lmap
! map is set at end of analyse phase using subroutines make_map
! and lcol_map, and is used in factorization phase by blk_col_add_a
end type lmap_type
!*************************************************
! Data type that contains counts and locks for the solve
! (one per a block column)
type slv_count_type
integer :: dep
integer(long) :: dblk
!$ integer(kind=omp_lock_kind) :: lock
end type slv_count_type
!*************************************************
! Data type for user controls
type MA87_control
integer :: diagnostics_level = 0 ! Controls diagnostic printing.
! Possible values are:
! < 0: no printing.
! 0: error and warning messages only.
! 1: as 0 plus basic diagnostic printing.
! 2: as 1 plus some more detailed diagnostic messages.
! 3: as 2 plus all entries of user-supplied arrays.
integer :: nb = nb_default ! Controls the size of the
! blocks used within each node (used to set nb within node_type)
integer :: nemin = nemin_default
! Node amalgamation parameter. A child node is merged with its parent
! if they both involve fewer than nemin eliminations.
integer :: pool_size = pool_default ! Size of task pool arrays
integer :: unit_diagnostics = 6 ! unit for diagnostic messages
! Printing is suppressed if unit_diagnostics < 0.
integer :: unit_error = 6 ! unit for error messages
! Printing is suppressed if unit_error < 0.
integer :: unit_warning = 6 ! unit for warning messages
! Printing is suppressed if unit_warning < 0.
!!!! Undocumented
!** integer :: time_out = -1 ! If >= 0 some internal timings
!** are printed on unit time_out. For HSL 2011 these are commented
!** using comments !** so easy to search and uncomment
!%%% integer :: unit_log = -1 ! For profiling log output
!%%% commented out for HSL 2011 using !%%%
!%%% integer :: log_level = 1 ! Level of profiling information
!!! Note: commenting out use of time_out and unit_log means
!%%% commented out for HSL 2011 using !%%%
!!! that some subroutines have unused dummy arguments that
!!! give warnings at compile time. We have not removed them
!!! since uncommenting the above controls would then be more tedious.
integer :: cache_tq_sz = 100 ! Size of local task stack
integer :: cache_layout = CACHE_COMPACT ! Proc <=> cache mapping
integer :: cache_cores = 2 ! Number of cores per cache
integer :: min_width_blas = 8 ! Minimum width of source block
! before we use an indirect update_between
! There are three cases for diagonal entries d_ii:
! diag_zero_plus < d_ii +ive eigenvalue
! diag_zero_minus < d_ii <= diag_zero_plus zero eigenvalue
! d_ii < min(diag_zero_minus, diag_zero_plus) -ive eigenvalue
! Traditional LAPACK potrf() corresponds to
! diag_zero_plus = diag_zero_minus = 0.0
real(wp) :: diag_zero_plus = 0.0
real(wp) :: diag_zero_minus = 0.0
end type MA87_control
!*************************************************
! data type for returning information to user.
type MA87_info
real(wp) :: detlog = 0 ! Holds logarithm of abs det A (or 0)
integer :: flag = 0 ! Error return flag (0 on success)
integer :: maxdepth = 0 ! Maximum depth of the tree.
integer(long) :: num_factor = 0_long ! Number of entries in the factor.
integer(long) :: num_flops = 0_long ! Number of flops for factor.
integer :: num_nodes = 0 ! Number of nodes
integer :: pool_size = pool_default ! Maximum size of task pool used
integer :: stat = 0 ! STAT value on error return -1.
integer :: num_zero = 0 ! Num pivots in range
! (diag_zero_minus, diag_zero_plus]
end type MA87_info
!*************************************************
! Data type for communication between threads and routines
type ma87_keep
private
type(block_type), dimension(:), allocatable :: blocks ! block info
integer, dimension(:), allocatable :: flag_array ! allocated to
! have size equal to the number of threads. For each thread, holds
! error flag
integer(long) :: final_blk = 0 ! Number of blocks. Used for destroying
! locks in finalise
type(ma87_info) :: info ! Holds copy of info
integer :: maxmn ! holds largest block dimension
integer :: n ! Order of the system.
type(node_type), dimension(:), allocatable :: nodes ! nodal info
integer :: nbcol = 0 ! number of block columns in L
type(lfactor), dimension(:), allocatable :: lfact
! holds block cols of L
type(lmap_type), dimension(:), allocatable :: lmap
! holds mapping from matrix values into lfact
logical, dimension(:), allocatable :: zero_flag
! true if variable i was in range (diag_zero_minus, diag_zero_plus]
! upon pivoting
end type ma87_keep
!*************************************************
! Data type for a task
type dagtask
integer :: task_type ! One of TASK_FACTORIZE_BLOCK, ...
integer(long) :: dest ! id of the target (destination) block
integer(long) :: src1 !
! if task_type = TASK_UPDATE_INTERNAL, src1 holds the id of the first
! source block
! if task_type = TASK_UPDATE_BETWEEN, src1 holds the id of a block
! in the block column of the source node that is used
! in updating dest.
integer(long) :: src2
! if task_type = TASK_UPDATE_INTERNAL, src2 holds the id of the second
! source block
! (src1 and src2 are blocks belonging to the same block column
! of the source node with src1 .le. src2)
! src2 is not used by the other tasks
integer :: csrc(2)
integer :: rsrc(2)
! for an UPDATE_BETWEEN task, we need to hold some additional
! information, which locates the source blocks rsrc and csrc
! within the source block col.
! This info. is set up the subroutine add_between_updates
end type dagtask
!*************************************************
! Data type for storing tasks we need to do.
! Task pool is held as a collection of 4 stacks for different priorities
! using the same space.
! Each stack is a linked list with its head given by an element of prihead.
! There are also local stacks for each cache.
type taskstack
integer :: max_pool_size = 0 ! max. number of tasks that are in
! the task pool at any one time during the factorization.
logical :: abort = .false. ! true if we have aborted
integer :: active ! Number of active threads
! (number of tasks in execution)
type(dagtask), dimension(:,:), allocatable :: ctasks ! local task stacks.
! allocated to have size (control%cache_tq_sz, ncache), where
! ncache is number of caches
integer, dimension(:), allocatable :: cheads ! Heads for local stacks.
! allocated to have size equal to number of caches
!$ integer(omp_lock_kind), dimension(:), allocatable :: clocks
! Locks for local stacks.
integer :: freehead ! Holds the head of linked list of
! entries in the task pool that are free
!$ integer(omp_lock_kind) :: lock ! lock so only one thread at a time
! can read/alter the task pool
integer :: lowest_priority_value = huge(0) !
! lowest priority value of the tasks in the pool.
! The priority value for each of the different types of task is
! 1. factor Highest priority
! 2. solve
! 3. update_internal
! 4. update_between Lowest priority
integer, dimension(:), allocatable :: next ! next task in linked list.
! allocated to have size pool_size. Reallocated if initial setting
! for pool_size found to be too small.
integer :: pool_size ! sizes of task pool arrays next and tasks.
! Initialised to control%pool_size
integer :: prihead(4) ! Holds the heads of the linked lists for tasks
! with priority values 1,2,3,4.
type(dagtask), dimension(:), allocatable :: tasks ! Holds tasks.
! allocated to have size pool_size. Reallocated if initial setting
! for pool_size found to be too small.
integer :: total ! Total number of tasks in pool
!** real, dimension(:), allocatable :: waiting ! Allocated to have size
!** ! equal to the number of threads. Used to hold times the threads spent
!** ! waiting if control%time_out >= 0
end type taskstack
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
contains
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
! Analyse phase.
! The user inputs the pivot order and lower
! triangular parts of A. Structure is expanded.
! Supervariables are computed
! and then the assembly tree is constructed and the data structures
! required by the factorization are set up.
! There is no checking of the user's data.
!
subroutine MA87_analyse_double(n, ptr, row, order, keep, control, info)
integer, intent(in) :: n ! order of A
integer, intent(in) :: row(:) ! row indices of lower triangular part
integer, intent(in) :: ptr(:) ! col pointers for lower triangular part
integer, intent(inout), dimension(:) :: order
! order(i) must hold position of i in the pivot sequence.
! On exit, holds the pivot order to be used by MA87_factor.
! For details of keep, control, info : see derived type descriptions
type(MA87_keep), intent(out) :: keep
type(MA87_control), intent(in) :: control
type(MA87_info), intent(inout) :: info
! Local arrays
integer, allocatable :: amap(:) ! map from user a to reordered a
integer, allocatable :: aptr(:) ! column pointers of expanded matrix
integer, allocatable :: arow(:) ! row pointers of expanded matrix
integer, allocatable :: count(:) ! used for depth
! first search of tree to keep track of level we are at
integer, allocatable :: cnode(:) ! used for depth
! first search of tree
integer, allocatable :: iw(:) ! work array
integer, allocatable :: lsize(:) ! lsize(ie) holds the
! length of the list of variables associated with node ie.
integer, allocatable :: map(:) ! Allocated to have size n.
! used in computing dependency counts. For each row k in
! j-th block column of a node, map(k1) is set to j
integer, allocatable :: perm(:) ! inverse permutation.
! perm(i) holds the variable that is i-th in the pivot sequence.
! Also used for checking user-supplied permutation.
integer, allocatable :: roots(:) ! Holds roots of the forest.
integer, allocatable :: node_map(:) ! used to number nodes
! contiguously after construction of tree is complete
! Local scalars.
integer :: a_nb ! block size of anode
integer :: anode ! ancestoral node of snode in tree
integer(long) :: blk ! temporary variable for holding block number
integer :: blkn ! number of columns within block column
integer :: cptr ! pointer into rows of snode that correspond
! to columns of an ancestor anode
integer :: cb ! index of block column within node
integer :: col_used ! used in computing number of cols in block col.
integer :: ci ! do loop variable. current block col.
integer(long) :: dblk ! diagonal block within block column
integer :: en ! holds keep%nodes(snode)%en
integer :: i ! temporary variable
integer(long) :: ii ! temporary variable
integer :: j ! temporary variable
integer :: jj ! temporary variable
integer :: jb ! block index in anode
integer :: k
integer :: k1 ! temporary variable
integer :: l_nb ! set to block size of snode (keep%nodes(snode)%nb)
integer :: mp ! copy of control%unit_diagnostics
integer :: ne ! set to ptr(n+1) - 1
integer :: nemin ! min. number of eliminations (see control%nemin)
integer :: node ! a node in tree
integer :: num_nodes ! number of nodes in tree
integer :: numcol ! number of cols in node (en-sa+1)
integer :: numrow ! number of rows in a block column
integer :: row_used ! used in computing number of rows in a block.
integer :: sa ! holds keep%nodes(snode)%sa
integer :: size_anode ! size(keep%nodes(anode)%index)
integer :: st ! stat parameter
integer :: sz ! number of blocks in a block column of node
integer :: swidth ! number of block columns in node
!** integer :: t_start, t_end, t_rate
type(mc78_control) :: control78
integer :: par
integer :: info78
integer, dimension(:), allocatable :: sptr, sparent, rlist
integer(long), dimension(:), allocatable :: rptr
! Possible error returns:
! MA87_ERROR_ALLOCATION Allocation error
! MA87_ERROR_ORDER Error in order
! initialise
info%flag = 0
info%num_factor = 0_long
info%num_flops = 0_long
info%num_nodes = 0
info%maxdepth = 0
info%stat = 0
keep%n = n
ne = ptr(n+1) - 1
! Perform appropriate printing
mp = control%unit_diagnostics
if (control%diagnostics_level>=1 .and. mp>=0) then
write (mp,'(/a)') ' On entry to MA87_analyse:'
write (mp,'(a,i15)') ' control%diagnostics_level = ', &
control%diagnostics_level
write (mp,'(a,i15)') ' control%unit_diagnostics = ',mp
write (mp,'(a,i15)') ' control%unit_error = ',control%unit_error
write (mp,'(a,i15)') ' control%unit_warning = ',control%unit_warning
write (mp,'(a,i15)') ' control%nemin = ',control%nemin
write (mp,'(a,i15)') ' control%nb = ',control%nb
write (mp,'(a,i15)') ' n = ',n
end if
if (control%diagnostics_level > 2 .and. mp>=0) then
write (mp,'(a)') ' ptr = '
write (mp,'(5i15)') ptr(1:n+1)
write (mp,'(a)') ' row = '
write (mp,'(5i15)') row(1:ne)
! Print out pivot order.
write (mp,'(a)') ' User-supplied elimination order :'
i = min(size(order),n)
write (mp,'(5i15)') order(1:i)
else if (control%diagnostics_level==2 .and. mp>=0) then
write (mp,'(a)') ' ptr(1:min(5,n+1)) = '
write (mp,'(5i15)') ptr(1:min(5,n+1))
write (mp,'(a)') ' row(1:min(5,ne)) = '
write (mp,'(5i15)') row(1:min(5,ne))
i = min(10,n)
i = min(size(order),i)
write (mp,'(a,2(/5i12))') &
' User-supplied elimination order :', order(1:i)
if (i < n) write (mp,'(a)') ' . . . . . .'
end if
! immediate return if n = 0
if (n == 0) return
! expand the matrix
! allocate space for expanded matrix (held in aptr,arow)
allocate (arow(2*ne-n),aptr(n+3),iw(n+1),amap(ptr(n+1)-1),stat=st)
if (st /= 0) go to 490
arow(1:ne) = row(1:ne)
aptr(1:n+1) = ptr(1:n+1)
call mc34_expand(n, arow, aptr, iw)
deallocate(iw,stat=st)
nemin = control%nemin
! Check nemin (a node is merged with its parent if both involve
! fewer than nemin eliminations). If out of range, use the default
if (nemin < 1) nemin = nemin_default
! Check the user-supplied array order and set the inverse in perm.
if (size(order).lt.n) then
info%flag = MA87_ERROR_ORDER
call MA87_print_flag(info%flag, control, context='MA87_analyse')
go to 500
end if
deallocate (perm,stat=st)
allocate (perm(n),stat=st)
if (st /= 0) go to 490
perm(:) = 0
k1 = 0
do i = 1, n
jj = order(i)
if (jj < 1 .or. jj > n) exit
if (perm(jj) /= 0) exit ! Duplicate found
perm(jj) = i
end do
if (i-1 /= n) then
info%flag = MA87_ERROR_ORDER
call MA87_print_flag(info%flag, control, context='MA87_analyse')
go to 500
end if
control78%nemin = nemin
control78%sort = .true.
control78%lopt = .true.
call mc78_analyse(n, aptr, arow, order, num_nodes, &
sptr, sparent, rptr, rlist, control78, info78, nfact=info%num_factor, &
nflops=info%num_flops)
info%num_nodes = num_nodes
!**************************************
! Set up nodal data structures
! For each node, hold data in keep%nodes(node)
deallocate(keep%nodes, stat=st)
allocate(keep%nodes(-1:num_nodes),stat=st)
if (st /= 0) go to 490
keep%nodes(0)%blk_en = 0
keep%nodes(1)%blk_sa = 1
keep%nodes(1)%sa = 1
! loop over root nodes
keep%nodes(:)%nchild = 0
do node = 1, num_nodes
keep%nodes(node)%sa = sptr(node)
keep%nodes(node)%en = sptr(node+1)-1
par = sparent(node)
keep%nodes(node)%parent = par
if(par .le. num_nodes) then
keep%nodes(par)%nchild = keep%nodes(par)%nchild + 1
else
keep%nodes(node)%parent = -1
endif
! determine and record the block size for node
! note we are careful in case l_nb**2 overflows (in fact 1+l_nb must
! not overflow at the end), and limit the answer to huge(l_nb)/2
l_nb = control%nb
if (l_nb < 1) l_nb = nb_default
l_nb = int(min(huge(l_nb)/2_long, &
(l_nb**2_long) / min(sptr(node+1)-sptr(node), l_nb) ))
l_nb = (l_nb-1) / 8 + 1
l_nb = 8 * l_nb
keep%nodes(node)%nb = l_nb
! Copy row list into keep%nodes
allocate(keep%nodes(node)%index(rptr(node+1)-rptr(node)),stat=st)
if (st /= 0) go to 490
j = 1
do ii = rptr(node), rptr(node+1)-1
keep%nodes(node)%index(j) = rlist(ii)
j = j + 1
end do
! Allocate space to store child nodes
allocate(keep%nodes(node)%child(keep%nodes(node)%nchild), stat=st)
if(st.ne.0) goto 490
! Calculate number j of blocks in node and set
! keep%nodes(node)%blk_en
sz = int(rptr(node+1)-rptr(node) - 1) / l_nb + 1
j = 0
do i = keep%nodes(node)%sa, keep%nodes(node)%en, l_nb
j = j + sz
sz = sz - 1
end do
keep%nodes(node)%blk_en = keep%nodes(node-1)%blk_en + j
! if node is not the final node, record first block
! for the next node (node+1)
if (node < num_nodes) &
keep%nodes(node+1)%blk_sa = keep%nodes(node)%blk_en + 1
end do
! set keep%final_blk to hold total number of blocks.
keep%final_blk = keep%nodes(num_nodes)%blk_en
! Add children to nodes, use sptr as a counter as it has fufilled its purpose
sptr(:) = 0
do node = 1, num_nodes
par = sparent(node)
if(par.gt.num_nodes) cycle
sptr(par) = sptr(par) + 1
keep%nodes(par)%child(sptr(par)) = node
end do
! Setup least descendants, to allow easy walk of subtrees
do node = -1, num_nodes
! initialise least descendat to self
keep%nodes(node)%least_desc = node
end do
do node = 1, num_nodes
! walk up tree from leaves. A parent's least descendant is either this
! nodes least descendant (itself in case of a leaf), or its existing
! one if that is smaller.
anode = keep%nodes(node)%parent
keep%nodes(anode)%least_desc = &
min(keep%nodes(node)%least_desc, keep%nodes(anode)%least_desc)
end do
!**************************************
! Fill out block information.
!** call system_clock(t_start)
deallocate(keep%blocks,stat=st)
allocate(keep%blocks(keep%final_blk),stat=st)
if(st.ne.0) go to 490
! Loop over the nodes. Number the blocks in the first node
! contiguously, then those in the second node, and so on.
! Each node has a number of block columns; the blocks within
! each block column are numbered contiguously.
! Also add up the number of block columns and store largest block dimension.
blk = 1
keep%nbcol = 0
keep%maxmn = 0
do node = 1, num_nodes
sa = keep%nodes(node)%sa
en = keep%nodes(node)%en
numcol = en - sa + 1
numrow = size(keep%nodes(node)%index)
! l_nb is the size of the blocks
l_nb = keep%nodes(node)%nb
! sz is number of blocks in the current block column
sz = (numrow - 1) / l_nb + 1
! cb is the index of the block col. within node
cb = 0
col_used = 0
! Loop over the block columns in node.
do ci = sa, en, l_nb
k = 1 ! use k to hold position of block within block column
! increment count of block columns
keep%nbcol = keep%nbcol + 1
cb = cb + 1
! blkn is the number of columns in the block column.
! For all but the last block col. blkn = l_nb.
blkn = min(l_nb, numcol-col_used)
col_used = col_used + blkn
dblk = blk
! loop over the row blocks (that is, loop over blocks in block col)
row_used = 0
do blk = dblk, dblk+sz-1
! store identity of block
keep%blocks(blk)%id = blk
! store number of rows in the block.
! For all but the last block, the number of rows is l_nb
keep%blocks(blk)%blkm = min(l_nb, numrow-row_used)
row_used = row_used + keep%blocks(blk)%blkm
! store number of columns in the block.
keep%blocks(blk)%blkn = blkn
keep%maxmn = max(keep%maxmn, &
keep%blocks(blk)%blkm, keep%blocks(blk)%blkn)
! store position of the first entry of the block within the
! block column of L
keep%blocks(blk)%sa = k
! store identity of diagonal block within current block column
keep%blocks(blk)%dblk = dblk
! store identity of last block within current block column
keep%blocks(blk)%last_blk = dblk + sz - 1
! store node the blk belongs to
keep%blocks(blk)%node = node
! initialise dependency count
keep%blocks(blk)%dep_initial = cb
! store identity of block column that blk belongs to
keep%blocks(blk)%bcol = keep%nbcol
!$ call omp_init_lock(keep%blocks(blk)%lock)
!$ call omp_init_lock(keep%blocks(blk)%alock)
! increment k by number of entries in block
k = k + keep%blocks(blk)%blkm * keep%blocks(blk)%blkn
end do
! Diagonal block has no dependency for factor(dblk)
keep%blocks(dblk)%dep_initial = cb - 1
! decrement number of row blocks and rows in next block column
sz = sz - 1
numrow = numrow - l_nb
end do
end do
!** if(control%time_out.ge.0) then
!** call system_clock(t_end, t_rate)
!** write(control%time_out,"(a,es12.4)") "fill block took ", &
!** (t_end - t_start) / real(t_rate)
!** end if
!
! Compute dependency counts
! Note: This might be more efficient if implemented in left-looking
! (all descendants) rather than right-looking (all ancestors) fashion.
!
!** call system_clock(t_start)
allocate (map(n),stat=st)
if(st.ne.0) go to 490
! loop over nodes
! FIXME: this loop can be particuarly expensive, and should perhaps be
! reordered so the map biulding loop only occurs once for each node.
! (eg PARSEC/Ga41As41H72 is particularly bad)
do node = 1, num_nodes
! anode is initially the parent of node. Later it will be the
! grandparent, then great grandparent as we move up the tree to the root
anode = keep%nodes(node)%parent
! numcol is number of columns in node
numcol = keep%nodes(node)%en - keep%nodes(node)%sa + 1
! initialise cptr
cptr = 1 + numcol
! set swidth to number of block columns in node
l_nb = keep%nodes(node)%nb
swidth = (numcol-1)/l_nb + 1
! loop over ancestors of node
do while(anode.gt.0)
! if we have finished with node, move to next node
if(cptr.gt.size(keep%nodes(node)%index)) exit
! If we have skipped an anode (eg if its only a parent because of
! other nodes in the subtree) we skip the current anode
if(keep%nodes(node)%index(cptr).gt.keep%nodes(anode)%en) then
anode = keep%nodes(anode)%parent
cycle
endif
! Build a map of anode's blocks.
! Within the matrix for anode, the block columns are numbered
! 1,2,3... For each row k1 in jb-th block column,
! map(k1) is set to jb.
a_nb = keep%nodes(anode)%nb
jb = 1 ! Block
! loop over the block columns in anode
size_anode = size(keep%nodes(anode)%index)
do i = 1, size_anode, a_nb
! loop over the rows in the block column
do k = i, min(i+a_nb-1, size_anode)
k1 = keep%nodes(anode)%index(k)
map(k1) = jb
end do
jb = jb + 1
end do
!print *, " map = ", map
! Loop over affected block columns
call calc_dep(cptr, node, anode, keep%nodes, keep%blocks, &
swidth, map)
! Move up the tree to the parent of anode
anode = keep%nodes(anode)%parent
end do
end do
! Note: following two functions could probably be combined with mc34 call
! above, but have been left as is to ease maintenance
allocate(keep%lmap(keep%nbcol),stat=st)
if(st.ne.0) go to 490
call make_map(n, order, ptr, row, aptr, arow, amap)
call lcol_map(aptr, arow, num_nodes, keep%nodes, keep%blocks, &
keep%lmap, map, amap, st)
if(st.ne.0) goto 490
!** if(control%time_out.ge.0) then
!** call system_clock(t_end)
!** write(control%time_out,"(a,es12.4)") &
!** "calculating initial dependencies took ", &
!** (t_end - t_start) / real(t_rate)
!** end if
if (mp < 0) go to 500
if (control%diagnostics_level >= 1) then
write (mp,'(/a)') ' Leaving MA87_analyse with:'
write (mp,'(a,i15)') ' flag = ',info%flag
write (mp,'(a,i15)') ' maxdepth = ',info%maxdepth
write (mp,'(a,es15.5)') ' num_factor = ',real(info%num_factor)
write (mp,'(a,i15)') ' num_nodes = ',info%num_nodes
write (mp,'(a,i15)') ' stat = ',info%stat
end if
if (control%diagnostics_level>2) then
! Print out pivot order.
write (mp,'(a)') ' On exit, elimination order :'
write (mp,'(5i15)') order(1:n)
else if (control%diagnostics_level==2) then
i = min(10,n)
write (mp,'(a,2(/5i12))') &
' On exit, elimination order :', order(1:i)
if (i < n) write (mp,'(a)') ' . . . . . .'
end if
go to 500
490 info%flag = MA87_ERROR_ALLOCATION
info%stat = st
call MA87_print_flag(info%flag, control, context='MA87_analyse',st=st)
500 continue
! before returning take copy of components of info set by MA87_analyse
keep%info%flag = info%flag
keep%info%num_factor = info%num_factor
keep%info%num_flops = info%num_flops
keep%info%num_nodes = info%num_nodes
keep%info%maxdepth = info%maxdepth
keep%info%stat = info%stat
deallocate (arow,stat=st)
deallocate (aptr,stat=st)
deallocate (amap,stat=st)
deallocate (count,stat=st)
deallocate (cnode,stat=st)
deallocate (iw,stat=st)
deallocate (lsize,stat=st)
deallocate (map,stat=st)
deallocate (perm,stat=st)
deallocate (roots,stat=st)
deallocate (node_map,stat=st)
end subroutine MA87_analyse_double
! Make a map from original A to reordered half matrix A
! The reordered half matrix's pattern is returned in nptr and nrow
subroutine make_map(n, perm, optr, orow, nptr, nrow, map)
integer, intent(in) :: n
integer, dimension(n), intent(in) :: perm
integer, dimension(n+1), intent(in) :: optr
integer, dimension(optr(n+1)-1), intent(in) :: orow
integer, dimension(n+3), intent(out) :: nptr ! extra space used for tricks
integer, dimension(optr(n+1)-1), intent(out) :: nrow
integer, dimension(optr(n+1)-1), intent(out) :: map
integer :: i, k, l
integer :: j
nptr(:) = 0
! Count number of entries in each column of new matrix (at offset 2)
do i = 1, n
l = perm(i)
do j = optr(i), optr(i+1)-1
k = perm(orow(j))
if(k<l) then
nptr(k+2) = nptr(k+2) + 1
else
nptr(l+2) = nptr(l+2) + 1
endif
end do
end do
! Calculate column starts (at offset 1)
nptr(1:2) = 1
do i = 2, n
nptr(i+1) = nptr(i) + nptr(i+1)
end do
! Now build map
do i = 1, n
l = perm(i)
do j = optr(i), optr(i+1)-1
k = perm(orow(j))
if(k<l) then
map(nptr(k+1)) = j
nrow(nptr(k+1)) = l
nptr(k+1) = nptr(k+1) + 1
else
map(nptr(l+1)) = j
nrow(nptr(l+1)) = k
nptr(l+1) = nptr(l+1) + 1
endif
end do
end do
end subroutine make_map
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Build mapping on per block column basis from user's val to block col's lcol
! This routine uses the reordered half matrix and map from the make_map routine
subroutine lcol_map(aptr, arow, num_nodes, nodes, blocks, lmap, map, amap, st)
! Reordered lower triangle of reordered matrix held using aptr and arow
integer, dimension(:), intent(in) :: aptr
integer, dimension(:), intent(in) :: arow
integer, intent(in) :: num_nodes
type(node_type), dimension(-1:), intent(in) :: nodes ! Node info
type(block_type), dimension(:), intent(in) :: blocks ! block info
type(lmap_type), dimension(:), intent(out) :: lmap ! output lcol map
integer, dimension(:), intent(out) :: map ! work array
integer, dimension(:), intent(in) :: amap ! map set up by make_map
integer, intent(out) :: st
! Local scalars
integer :: bcol ! block column
integer :: cb ! Temporary variable
integer :: col ! do loop index
integer(long) :: dblk ! set to keep%nodes(snode)%blk_sa (first block
! in snode which is, of course, a diagonal block)
integer :: en ! set keep%nodes(snode)%en
integer(long) :: i ! Temporary variable. global row index
integer :: j ! Temporary variable
integer :: l_nb ! set to keep%nodes(snode)%nb
integer(long) :: offset
integer :: sa ! set keep%nodes(snode)%sa
integer :: snode ! node
integer :: swidth ! set to keep%blocks(dblk)%blkn (number of columns
! in block column to which dblk belongs)