From a4f0b5fb95a596e315b561617e869f71774247ae Mon Sep 17 00:00:00 2001 From: Dave Parker Date: Tue, 7 Nov 2023 14:16:22 -0800 Subject: [PATCH] Update ParMETIS third party software (#132) * Add gklib and copy over new code. * Set int and real widths to 32, setting to 64 does not work. * Add some READMEs. --- Code/CMake/SimVascularInternals.cmake | 1 + Code/CMake/SimVascularOptions.cmake | 1 + Code/CMake/SimVascularThirdParty.cmake | 8 + Code/Source/svFSI/SPLIT.c | 10 +- Code/Source/svFSI/distribute.cpp | 4 +- Code/ThirdParty/README | 2 - Code/ThirdParty/README.md | 18 + Code/ThirdParty/gklib_svfsi/CMakeLists.txt | 1 + Code/ThirdParty/gklib_svfsi/README.md | 5 + .../gklib_svfsi/simvascular_gklib_svfsi.h.in | 37 + .../simvascular_gklib_svfsi/CMakeLists.txt | 41 + .../simvascular_gklib_svfsi/GKlib.h | 85 + .../gklib_svfsi/simvascular_gklib_svfsi/b64.c | 95 + .../simvascular_gklib_svfsi/blas.c | 37 + .../simvascular_gklib_svfsi/cache.c | 126 + .../gklib_svfsi/simvascular_gklib_svfsi/csr.c | 3378 +++++ .../simvascular_gklib_svfsi/error.c | 214 + .../simvascular_gklib_svfsi/evaluate.c | 132 + .../simvascular_gklib_svfsi/fkvkselect.c | 142 + .../gklib_svfsi/simvascular_gklib_svfsi/fs.c | 225 + .../simvascular_gklib_svfsi/getopt.c | 855 ++ .../simvascular_gklib_svfsi/gk_arch.h | 70 + .../simvascular_gklib_svfsi/gk_defs.h | 87 + .../simvascular_gklib_svfsi/gk_externs.h | 25 + .../simvascular_gklib_svfsi/gk_getopt.h | 64 + .../simvascular_gklib_svfsi/gk_macros.h | 169 + .../simvascular_gklib_svfsi/gk_mkblas.h | 203 + .../simvascular_gklib_svfsi/gk_mkmemory.h | 142 + .../simvascular_gklib_svfsi/gk_mkpqueue.h | 440 + .../simvascular_gklib_svfsi/gk_mkpqueue2.h | 215 + .../simvascular_gklib_svfsi/gk_mkrandom.h | 123 + .../simvascular_gklib_svfsi/gk_mksort.h | 271 + .../simvascular_gklib_svfsi/gk_mkutils.h | 40 + .../simvascular_gklib_svfsi/gk_ms_inttypes.h | 301 + .../simvascular_gklib_svfsi/gk_ms_stat.h | 22 + .../simvascular_gklib_svfsi/gk_ms_stdint.h | 222 + .../simvascular_gklib_svfsi/gk_proto.h | 426 + .../simvascular_gklib_svfsi/gk_struct.h | 296 + .../simvascular_gklib_svfsi/gk_types.h | 38 + .../simvascular_gklib_svfsi/gk_util.c | 107 + .../simvascular_gklib_svfsi/gkregex.c | 10704 ++++++++++++++++ .../simvascular_gklib_svfsi/gkregex.h | 556 + .../simvascular_gklib_svfsi/graph.c | 1940 +++ .../simvascular_gklib_svfsi/htable.c | 247 + .../gklib_svfsi/simvascular_gklib_svfsi/io.c | 681 + .../simvascular_gklib_svfsi/itemsets.c | 210 + .../simvascular_gklib_svfsi/mcore.c | 393 + .../simvascular_gklib_svfsi/memory.c | 307 + .../simvascular_gklib_svfsi/pqueue.c | 25 + .../simvascular_gklib_svfsi/random.c | 136 + .../gklib_svfsi/simvascular_gklib_svfsi/rw.c | 103 + .../gklib_svfsi/simvascular_gklib_svfsi/seq.c | 174 + .../simvascular_gklib_svfsi/sort.c | 437 + .../simvascular_gklib_svfsi/string.c | 530 + .../simvascular_gklib_svfsi/timers.c | 52 + .../simvascular_gklib_svfsi/tokenizer.c | 77 + .../simvascular_gklib_svfsi/win32/adapt.c | 11 + .../simvascular_gklib_svfsi/win32/adapt.h | 14 + Code/ThirdParty/metis_svfsi/README.md | 6 + .../METISLib/CMakeLists.txt | 19 +- .../simvascular_metis_svfsi/METISLib/auxapi.c | 43 + .../METISLib/balance.c | 446 +- .../METISLib/bucketsort.c | 20 +- .../METISLib/ccgraph.c | 599 - .../METISLib/checkgraph.c | 265 +- .../METISLib/coarsen.c | 1159 +- .../METISLib/compress.c | 207 +- .../simvascular_metis_svfsi/METISLib/contig.c | 699 + .../simvascular_metis_svfsi/METISLib/debug.c | 359 +- .../simvascular_metis_svfsi/METISLib/defs.h | 163 +- .../simvascular_metis_svfsi/METISLib/estmem.c | 157 - .../simvascular_metis_svfsi/METISLib/fm.c | 485 +- .../METISLib/fortran.c | 112 +- .../METISLib/frename.c | 422 +- .../simvascular_metis_svfsi/METISLib/gklib.c | 120 + .../METISLib/gklib_defs.h | 53 + .../METISLib/gklib_rename.h | 122 + .../simvascular_metis_svfsi/METISLib/graph.c | 833 +- .../METISLib/initpart.c | 687 +- .../simvascular_metis_svfsi/METISLib/kmetis.c | 618 +- .../METISLib/kvmetis.c | 129 - .../simvascular_metis_svfsi/METISLib/kwayfm.c | 2520 +++- .../METISLib/kwayrefine.c | 860 +- .../METISLib/kwayvolfm.c | 1775 --- .../METISLib/kwayvolrefine.c | 468 - .../simvascular_metis_svfsi/METISLib/macros.h | 290 +- .../simvascular_metis_svfsi/METISLib/match.c | 267 - .../METISLib/mbalance.c | 260 - .../METISLib/mbalance2.c | 328 - .../METISLib/mcoarsen.c | 106 - .../simvascular_metis_svfsi/METISLib/mcutil.c | 330 + .../simvascular_metis_svfsi/METISLib/memory.c | 209 - .../simvascular_metis_svfsi/METISLib/mesh.c | 695 +- .../METISLib/meshpart.c | 330 +- .../simvascular_metis_svfsi/METISLib/metis.h | 378 +- .../METISLib/metislib.h | 41 + .../simvascular_metis_svfsi/METISLib/mfm.c | 341 - .../simvascular_metis_svfsi/METISLib/mfm2.c | 349 - .../METISLib/minconn.c | 729 ++ .../METISLib/mincover.c | 69 +- .../METISLib/minitpart.c | 358 - .../METISLib/minitpart2.c | 368 - .../METISLib/mkmetis.c | 124 - .../METISLib/mkwayfmh.c | 677 - .../METISLib/mkwayrefine.c | 296 - .../simvascular_metis_svfsi/METISLib/mmatch.c | 501 - .../simvascular_metis_svfsi/METISLib/mmd.c | 99 +- .../METISLib/mpmetis.c | 402 - .../METISLib/mrefine.c | 219 - .../METISLib/mrefine2.c | 55 - .../simvascular_metis_svfsi/METISLib/mutil.c | 101 - .../METISLib/myqsort.c | 547 - .../simvascular_metis_svfsi/METISLib/ometis.c | 947 +- .../METISLib/options.c | 541 + .../simvascular_metis_svfsi/METISLib/out | 107 - .../METISLib/parmetis.c | 870 +- .../METISLib/parmetis.h | 224 - .../simvascular_metis_svfsi/METISLib/pmetis.c | 483 +- .../simvascular_metis_svfsi/METISLib/pqueue.c | 579 - .../simvascular_metis_svfsi/METISLib/proto.h | 727 +- .../simvascular_metis_svfsi/METISLib/refine.c | 286 +- .../simvascular_metis_svfsi/METISLib/rename.h | 554 +- .../METISLib/separator.c | 198 +- .../simvascular_metis_svfsi/METISLib/sfm.c | 645 +- .../METISLib/srefine.c | 152 +- .../simvascular_metis_svfsi/METISLib/stat.c | 243 +- .../simvascular_metis_svfsi/METISLib/stats.c | 44 - .../METISLib/stdheaders.h | 6 +- .../simvascular_metis_svfsi/METISLib/struct.h | 413 +- .../METISLib/subdomains.c | 1294 -- .../METISLib/temp/metis.h | 378 +- .../METISLib/temp/metis_defs.h | 163 +- .../simvascular_metis_svfsi/METISLib/timing.c | 71 +- .../simvascular_metis_svfsi/METISLib/util.c | 507 +- .../simvascular_metis_svfsi/METISLib/wspace.c | 219 + Code/ThirdParty/parmetis_svfsi/README.md | 6 + .../ParMETISLib/CMakeLists.txt | 39 +- .../ParMETISLib/adrivers.c | 117 - .../ParMETISLib/akwayfm.c | 582 +- .../ParMETISLib/ametis.c | 287 +- .../ParMETISLib/backcompat.c | 517 - .../ParMETISLib/balancemylink.c | 207 +- .../ParMETISLib/comm.c | 400 +- .../ParMETISLib/csrmatch.c | 63 +- .../ParMETISLib/ctrl.c | 165 + .../ParMETISLib/debug.c | 126 +- .../ParMETISLib/defs.h | 11 +- .../ParMETISLib/diffutil.c | 134 +- .../ParMETISLib/fpqueue.c | 398 - .../ParMETISLib/frename.c | 270 +- .../ParMETISLib/gklib.c | 120 + .../ParMETISLib/gklib_defs.h | 53 + .../ParMETISLib/gklib_rename.h | 122 + .../ParMETISLib/gkmetis.c | 425 +- .../ParMETISLib/gkmpi.c | 404 + .../ParMETISLib/graph.c | 496 + .../ParMETISLib/grsetup.c | 241 - .../ParMETISLib/iidxsort.c | 152 - .../ParMETISLib/iintsort.c | 157 - .../ParMETISLib/ikeysort.c | 151 - .../ParMETISLib/ikeyvalsort.c | 151 - .../ParMETISLib/initbalance.c | 451 +- .../ParMETISLib/initmsection.c | 204 +- .../ParMETISLib/initpart.c | 242 +- .../ParMETISLib/kmetis.c | 301 +- .../ParMETISLib/kwaybalance.c | 456 - .../ParMETISLib/kwayfm.c | 584 - .../ParMETISLib/kwayrefine.c | 1066 +- .../ParMETISLib/macros.h | 126 +- .../ParMETISLib/match.c | 1224 +- .../ParMETISLib/mdiffusion.c | 322 +- .../ParMETISLib/memory.c | 276 - .../ParMETISLib/mesh.c | 157 +- .../ParMETISLib/mmetis.c | 99 +- .../ParMETISLib/move.c | 235 +- .../ParMETISLib/msetup.c | 42 +- .../ParMETISLib/node_refine.c | 281 +- .../ParMETISLib/ometis.c | 462 +- .../ParMETISLib/parmetis.h | 228 +- .../ParMETISLib/parmetislib.h | 24 +- .../ParMETISLib/proto.h | 530 +- .../ParMETISLib/pspases.c | 121 +- .../ParMETISLib/redomylink.c | 130 +- .../ParMETISLib/remap.c | 73 +- .../ParMETISLib/rename.h | 475 +- .../ParMETISLib/renumber.c | 94 + .../ParMETISLib/rmetis.c | 187 +- .../ParMETISLib/selectq.c | 137 +- .../ParMETISLib/serial.c | 813 +- .../ParMETISLib/setup.c | 235 - .../ParMETISLib/stat.c | 283 +- .../ParMETISLib/stdheaders.h | 25 - .../ParMETISLib/struct.h | 387 +- .../ParMETISLib/temp/metis_defs.h | 11 +- .../ParMETISLib/temp/parmetislib.h | 24 +- .../ParMETISLib/timer.c | 16 +- .../ParMETISLib/util.c | 700 +- .../ParMETISLib/wave.c | 305 +- .../ParMETISLib/weird.c | 632 +- .../ParMETISLib/wspace.c | 138 + .../ParMETISLib/xyzpart.c | 627 +- 201 files changed, 47395 insertions(+), 27863 deletions(-) delete mode 100644 Code/ThirdParty/README create mode 100644 Code/ThirdParty/README.md create mode 100644 Code/ThirdParty/gklib_svfsi/CMakeLists.txt create mode 100644 Code/ThirdParty/gklib_svfsi/README.md create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi.h.in create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/CMakeLists.txt create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/GKlib.h create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/b64.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/blas.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/cache.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/csr.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/error.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/evaluate.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/fkvkselect.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/fs.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/getopt.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_arch.h create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_defs.h create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_externs.h create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_getopt.h create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_macros.h create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_mkblas.h create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_mkmemory.h create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_mkpqueue.h create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_mkpqueue2.h create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_mkrandom.h create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_mksort.h create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_mkutils.h create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_ms_inttypes.h create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_ms_stat.h create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_ms_stdint.h create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_proto.h create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_struct.h create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_types.h create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_util.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gkregex.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gkregex.h create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/graph.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/htable.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/io.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/itemsets.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/mcore.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/memory.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/pqueue.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/random.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/rw.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/seq.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/sort.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/string.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/timers.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/tokenizer.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/win32/adapt.c create mode 100644 Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/win32/adapt.h create mode 100644 Code/ThirdParty/metis_svfsi/README.md create mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/auxapi.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/ccgraph.c create mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/contig.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/estmem.c create mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/gklib.c create mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/gklib_defs.h create mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/gklib_rename.h delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kvmetis.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kwayvolfm.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kwayvolrefine.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/match.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mbalance.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mbalance2.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mcoarsen.c create mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mcutil.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/memory.c create mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/metislib.h delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mfm.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mfm2.c create mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/minconn.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/minitpart.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/minitpart2.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mkmetis.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mkwayfmh.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mkwayrefine.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mmatch.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mpmetis.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mrefine.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mrefine2.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mutil.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/myqsort.c create mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/options.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/out delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/parmetis.h delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/pqueue.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/stats.c delete mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/subdomains.c create mode 100644 Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/wspace.c create mode 100644 Code/ThirdParty/parmetis_svfsi/README.md delete mode 100644 Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/adrivers.c delete mode 100644 Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/backcompat.c create mode 100644 Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/ctrl.c delete mode 100644 Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/fpqueue.c create mode 100644 Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/gklib.c create mode 100644 Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/gklib_defs.h create mode 100644 Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/gklib_rename.h create mode 100644 Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/gkmpi.c create mode 100644 Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/graph.c delete mode 100644 Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/grsetup.c delete mode 100644 Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/iidxsort.c delete mode 100644 Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/iintsort.c delete mode 100644 Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/ikeysort.c delete mode 100644 Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/ikeyvalsort.c delete mode 100644 Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/kwaybalance.c delete mode 100644 Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/kwayfm.c delete mode 100644 Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/memory.c create mode 100644 Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/renumber.c delete mode 100644 Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/setup.c delete mode 100644 Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/stdheaders.h create mode 100644 Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/wspace.c diff --git a/Code/CMake/SimVascularInternals.cmake b/Code/CMake/SimVascularInternals.cmake index d174bea9..430ee65b 100644 --- a/Code/CMake/SimVascularInternals.cmake +++ b/Code/CMake/SimVascularInternals.cmake @@ -1,6 +1,7 @@ set(SV_LIBS THIRDPARTY_METIS_SVFSI THIRDPARTY_PARMETIS_SVFSI + THIRDPARTY_GKLIB_SVFSI THIRDPARTY_TETGEN THIRDPARTY_TINYXML THIRDPARTY_ZLIB diff --git a/Code/CMake/SimVascularOptions.cmake b/Code/CMake/SimVascularOptions.cmake index 0c3fcb2e..826e4a4c 100644 --- a/Code/CMake/SimVascularOptions.cmake +++ b/Code/CMake/SimVascularOptions.cmake @@ -58,6 +58,7 @@ option(SV_USE_NOTIMER "Use notimer" ON) # Solver Build Options (Modules) option(SV_USE_EIGEN_SVFSI "Use Eigen headers" ON) option(SV_USE_METIS_SVFSI "Use metis_svfsi Library" ON) +option(SV_USE_GKLIB_SVFSI "Use GKlib_svfsi Library" ON) option(SV_USE_PARMETIS_SVFSI "Use parmetis_svfsi Library" ON) option(SV_USE_TETGEN "Use tetgen Library" ON) diff --git a/Code/CMake/SimVascularThirdParty.cmake b/Code/CMake/SimVascularThirdParty.cmake index 83e4da3b..e693acb1 100644 --- a/Code/CMake/SimVascularThirdParty.cmake +++ b/Code/CMake/SimVascularThirdParty.cmake @@ -1,4 +1,12 @@ #----------------------------------------------------------------------------- +# GKLIB_SVFSI +if(SV_USE_GKLIB_SVFSI) + set(USE_GKLIB_SVFSI ON) + simvascular_third_party(gklib_svfsi) + # require to be built here + set(GKLIB__SVFSI_LIBRARY ${SV_LIB_THIRDPARTY_GKLIB_SVFSI_NAME}) +endif() + # METIS_SVFSI if(SV_USE_METIS_SVFSI) set(USE_METIS_SVFSI ON) diff --git a/Code/Source/svFSI/SPLIT.c b/Code/Source/svFSI/SPLIT.c index 697bac6f..228d0ecd 100644 --- a/Code/Source/svFSI/SPLIT.c +++ b/Code/Source/svFSI/SPLIT.c @@ -40,7 +40,7 @@ #include"parmetislib.h" int split_(int *nElptr, int *eNoNptr, int *eNoNbptr, int *IEN, - int *nPartsPtr, idxtype *iElmdist, float *iWgt, idxtype *part) + int *nPartsPtr, idx_t *iElmdist, float *iWgt, idx_t *part) { int i, e, a, nEl=*nElptr, eNoN=*eNoNptr, eNoNb=*eNoNbptr, @@ -48,12 +48,12 @@ int split_(int *nElptr, int *eNoNptr, int *eNoNbptr, int *IEN, ncommonnodes, options[10], *exRanks, nExRanks, *map, edgecut; float ubvec[MAXNCON], *wgt; - idxtype *eptr, *eind, *elmdist; + idx_t *eptr, *eind, *elmdist; map = (int *)malloc(nTasks*sizeof(int)); exRanks = (int *)malloc(nTasks*sizeof(int)); wgt = (float *)malloc(nTasks*sizeof(float)); - elmdist = (idxtype *)malloc((nTasks+1)*sizeof(idxtype)); + elmdist = (idx_t *)malloc((nTasks+1)*sizeof(idx_t)); MPI_Group newGrp, tmpGrp; MPI_Comm comm; @@ -92,8 +92,8 @@ int split_(int *nElptr, int *eNoNptr, int *eNoNbptr, int *IEN, return -1; } - eptr = (idxtype *)malloc((nEl+1)*sizeof(idxtype)); - eind = (idxtype *)malloc(nEl*eNoN*sizeof(idxtype)); + eptr = (idx_t *)malloc((nEl+1)*sizeof(idx_t)); + eind = (idx_t *)malloc(nEl*eNoN*sizeof(idx_t)); for (e=0; e<=nEl; e++) { eptr[e] = e*eNoN; diff --git a/Code/Source/svFSI/distribute.cpp b/Code/Source/svFSI/distribute.cpp index eca9cada..d865d003 100644 --- a/Code/Source/svFSI/distribute.cpp +++ b/Code/Source/svFSI/distribute.cpp @@ -62,7 +62,7 @@ void distribute(Simulation* simulation) auto& com_mod = simulation->com_mod; auto& cm = com_mod.cm; - #define n_debug_distribute + #define debug_distribute #ifdef debug_distribute DebugMsg dmsg(__func__, com_mod.cm.idcm()); dmsg.banner(); @@ -1434,7 +1434,7 @@ void part_msh(Simulation* simulation, int iM, mshType& lM, Vector& gmtl, in int num_proc = cm.np(); int task_id = cm.idcm(); - #define n_dbg_part_msh + #define dbg_part_msh #ifdef dbg_part_msh DebugMsg dmsg(__func__, com_mod.cm.idcm()); dmsg.banner(); diff --git a/Code/ThirdParty/README b/Code/ThirdParty/README deleted file mode 100644 index 73102fb9..00000000 --- a/Code/ThirdParty/README +++ /dev/null @@ -1,2 +0,0 @@ -The files in this directory are included only for convience when compiling SimVascular. They have their own license agreements. Only minimal changes have been made to allow their use in SimVascular. -Please see README.simvascular in each directory for more details. \ No newline at end of file diff --git a/Code/ThirdParty/README.md b/Code/ThirdParty/README.md new file mode 100644 index 00000000..7c6e0b7e --- /dev/null +++ b/Code/ThirdParty/README.md @@ -0,0 +1,18 @@ +This directory contains the source for external applications used by svFSIplus. The applications are compiled and liked with svFSIplus. + +Each application has its own license agreement. + +Applications + +eigen - A header-only application for matrix objects. + +gklib_svfsi - A library used by METIS and ParMETIS applications. + +metis_svfsi - The METIS mesh partitioning application used by ParMETIS. + +parmetis_svfsi - The ParMETIS parallel mesh partitioning application. + +tetgen - A mesh generatin application. + +tinyxml - A header-only application used to read and write XML files. + diff --git a/Code/ThirdParty/gklib_svfsi/CMakeLists.txt b/Code/ThirdParty/gklib_svfsi/CMakeLists.txt new file mode 100644 index 00000000..61844e2d --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/CMakeLists.txt @@ -0,0 +1 @@ +simvascular_third_party(gklib_svfsi) diff --git a/Code/ThirdParty/gklib_svfsi/README.md b/Code/ThirdParty/gklib_svfsi/README.md new file mode 100644 index 00000000..761a500d --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/README.md @@ -0,0 +1,5 @@ + +The GKlib library contains various helper routines used by METIS and ParMETIS. + +The METIS-v5.1.1-DistDGL-0.5 version of the source was downloaded from https://github.com/KarypisLab/GKlib. + diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi.h.in b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi.h.in new file mode 100644 index 00000000..bdf222b8 --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi.h.in @@ -0,0 +1,37 @@ +/*========================================================================= + * + * Copyright (c) 2014-2015 The Regents of the University of California. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject + * to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *=========================================================================*/ + +#ifndef __simvascular_GKlib_svfsi_svfsi_h +#define __simvascular_GKlib_svfsi_svfsi_h + +# hardcoded to build, update if want to allow system version +#include + +#endif diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/CMakeLists.txt b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/CMakeLists.txt new file mode 100644 index 00000000..f2cc86ad --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/CMakeLists.txt @@ -0,0 +1,41 @@ +cmake_minimum_required(VERSION 2.8.8) + +project(GKLIB_SVFSI) + +#set default build type to relwithdebinfo, also sets flags O2 on linux +if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) + message(STATUS "Setting build type to 'RelWithDebInfo' as none was specified.") + set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "Choose the type of build." FORCE) + mark_as_advanced(CMAKE_BUILD_TYPE) + # Set the possible values of build type for cmake-gui + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" + "MinSizeRel" "RelWithDebInfo") +endif() + +if(WIN32) + add_definitions("/Zi") + add_definitions("-DWINDOWS") +endif() + +include_directories(./) +include_directories(${MPI_C_INCLUDE_PATH}) + +set(CSRCS b64.c blas.c cache.c csr.c error.c evaluate.c + fkvkselect.c fs.c getopt.c gk_util.c gkregex.c graph.c + htable.c io.c itemsets.c mcore.c memory.c pqueue.c + random.c rw.c seq.c sort.c string.c timers.c tokenizer.c +) + +if(SV_USE_THIRDPARTY_SHARED_LIBRARIES) + add_library(${GKLIB_SVFSI_LIBRARY_NAME} SHARED ${CSRCS}) +else() + add_library(${GKLIB_SVFSI_LIBRARY_NAME} STATIC ${CSRCS}) +endif() + +if(SV_INSTALL_LIBS) + install(TARGETS ${GKLIB_SVFSI_LIBRARY_NAME} + RUNTIME DESTINATION ${SV_INSTALL_RUNTIME_DIR} COMPONENT ThirdPartyExecutables + LIBRARY DESTINATION ${SV_INSTALL_LIBRARY_DIR} COMPONENT ThirdPartyLibraries + ARCHIVE DESTINATION ${SV_INSTALL_ARCHIVE_DIR} COMPONENT ThirdPartyLibraries) +endif() + diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/GKlib.h b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/GKlib.h new file mode 100644 index 00000000..9278fe41 --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/GKlib.h @@ -0,0 +1,85 @@ +/* + * GKlib.h + * + * George's library of most frequently used routines + * + * $Id: GKlib.h 14866 2013-08-03 16:40:04Z karypis $ + * + */ + +#ifndef _GKLIB_H_ +#define _GKLIB_H_ 1 + +#define GKMSPACE + +#if defined(_MSC_VER) +#define __MSC__ +#endif +#if defined(__ICC) +#define __ICC__ +#endif + + +#include "gk_arch.h" /*!< This should be here, prior to the includes */ + + +/************************************************************************* +* Header file inclusion section +**************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__WITHPCRE__) + #include +#else + #if defined(USE_GKREGEX) + #include "gkregex.h" + #else + #include + #endif /* defined(USE_GKREGEX) */ +#endif /* defined(__WITHPCRE__) */ + + + +#if defined(__OPENMP__) +#include +#endif + + + + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + + +#endif /* GKlib.h */ + + diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/b64.c b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/b64.c new file mode 100644 index 00000000..afacd68a --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/b64.c @@ -0,0 +1,95 @@ +/*! +\file b64.c +\brief This file contains some simple 8bit-to-6bit encoding/deconding routines + +Most of these routines are outdated and should be converted using glibc's equivalent +routines. + +\date Started 2/22/05 +\author George +\version\verbatim $Id: b64.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim + +\verbatim +$Copyright$ +$License$ +\endverbatim + +*/ + + +#include "GKlib.h" + +#define B64OFFSET 48 /* This is the '0' number */ + + +/****************************************************************************** +* Encode 3 '8-bit' binary bytes as 4 '6-bit' characters +*******************************************************************************/ +void encodeblock(unsigned char *in, unsigned char *out) +{ + out[0] = (in[0] >> 2); + out[1] = (((in[0] & 0x03) << 4) | (in[1] >> 4)); + out[2] = (((in[1] & 0x0f) << 2) | (in[2] >> 6)); + out[3] = (in[2] & 0x3f); + + out[0] += B64OFFSET; + out[1] += B64OFFSET; + out[2] += B64OFFSET; + out[3] += B64OFFSET; + +// printf("%c %c %c %c %2x %2x %2x %2x %2x %2x %2x\n", out[0], out[1], out[2], out[3], out[0], out[1], out[2], out[3], in[0], in[1], in[2]); +} + +/****************************************************************************** +* Decode 4 '6-bit' characters into 3 '8-bit' binary bytes +*******************************************************************************/ +void decodeblock(unsigned char *in, unsigned char *out) +{ + in[0] -= B64OFFSET; + in[1] -= B64OFFSET; + in[2] -= B64OFFSET; + in[3] -= B64OFFSET; + + out[0] = (in[0] << 2 | in[1] >> 4); + out[1] = (in[1] << 4 | in[2] >> 2); + out[2] = (in[2] << 6 | in[3]); +} + + +/****************************************************************************** +* This function encodes an input array of bytes into a base64 encoding. Memory +* for the output array is assumed to have been allocated by the calling program +* and be sufficiently large. The output string is NULL terminated. +*******************************************************************************/ +void GKEncodeBase64(int nbytes, unsigned char *inbuffer, unsigned char *outbuffer) +{ + int i, j; + + if (nbytes%3 != 0) + gk_errexit(SIGERR, "GKEncodeBase64: Input buffer size should be a multiple of 3! (%d)\n", nbytes); + + for (j=0, i=0; i + + + +/*************************************************************************/ +/*! Use the templates to generate BLAS routines for the scalar data types */ +/*************************************************************************/ +GK_MKBLAS(gk_c, char, int) +GK_MKBLAS(gk_i, int, int) +GK_MKBLAS(gk_i32, int32_t, int32_t) +GK_MKBLAS(gk_i64, int64_t, int64_t) +GK_MKBLAS(gk_z, ssize_t, ssize_t) +GK_MKBLAS(gk_zu, size_t, size_t) +GK_MKBLAS(gk_f, float, float) +GK_MKBLAS(gk_d, double, double) +GK_MKBLAS(gk_idx, gk_idx_t, gk_idx_t) + + + + diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/cache.c b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/cache.c new file mode 100644 index 00000000..932e36d9 --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/cache.c @@ -0,0 +1,126 @@ +/*! +\file +\brief Functions dealing with simulating cache behavior for performance + modeling and analysis; + +\date Started 4/13/18 +\author George +\author Copyright 1997-2011, Regents of the University of Minnesota +\version $Id: cache.c 21991 2018-04-16 03:08:12Z karypis $ +*/ + +#include + + +/*************************************************************************/ +/*! This function creates a cache + */ +/*************************************************************************/ +gk_cache_t *gk_cacheCreate(uint32_t nway, uint32_t lnbits, size_t cnbits) +{ + gk_cache_t *cache; + + cache = (gk_cache_t *)gk_malloc(sizeof(gk_cache_t), "gk_cacheCreate: cache"); + memset(cache, 0, sizeof(gk_cache_t)); + + cache->nway = nway; + cache->lnbits = lnbits; + cache->cnbits = cnbits; + cache->csize = 1<cmask = cache->csize-1; + + cache->latimes = gk_ui64smalloc(cache->csize*nway, 0, "gk_cacheCreate: latimes"); + cache->clines = gk_zusmalloc(cache->csize*nway, 0, "gk_cacheCreate: clines"); + + return cache; +} + + +/*************************************************************************/ +/*! This function resets a cache + */ +/*************************************************************************/ +void gk_cacheReset(gk_cache_t *cache) +{ + cache->nhits = 0; + cache->nmisses = 0; + + gk_ui64set(cache->csize*cache->nway, 0, cache->latimes); + gk_zuset(cache->csize*cache->nway, 0, cache->clines); + + return; +} + + +/*************************************************************************/ +/*! This function destroys a cache. + */ +/*************************************************************************/ +void gk_cacheDestroy(gk_cache_t **r_cache) +{ + gk_cache_t *cache = *r_cache; + + if (cache == NULL) + return; + + gk_free((void **)&cache->clines, &cache->latimes, &cache, LTERM); + + *r_cache = NULL; +} + + +/*************************************************************************/ +/*! This function simulates a load(ptr) operation. + */ +/*************************************************************************/ +int gk_cacheLoad(gk_cache_t *cache, size_t addr) +{ + uint32_t i, nway=cache->nway; + size_t lru=0; + + //printf("%16"PRIx64" ", (uint64_t)addr); + addr = addr>>(cache->lnbits); + //printf("%16"PRIx64" %16"PRIx64" %16"PRIx64" ", (uint64_t)addr, (uint64_t)addr&(cache->cmask), (uint64_t)cache->cmask); + + size_t *clines = cache->clines + (addr&(cache->cmask)); + uint64_t *latimes = cache->latimes + (addr&(cache->cmask)); + + cache->clock++; + for (i=0; inhits++; + latimes[i] = cache->clock; + goto DONE; + } + } + + for (i=0; inmisses++; + clines[lru] = addr; + latimes[lru] = cache->clock; + +DONE: + //printf(" %"PRIu64" %"PRIu64"\n", cache->nhits, cache->clock); + return 1; +} + + +/*************************************************************************/ +/*! This function returns the cache's hitrate + */ +/*************************************************************************/ +double gk_cacheGetHitRate(gk_cache_t *cache) +{ + return ((double)cache->nhits)/((double)(cache->clock+1)); +} + diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/csr.c b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/csr.c new file mode 100644 index 00000000..7e92a0c3 --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/csr.c @@ -0,0 +1,3378 @@ +/*! + * \file + * + * \brief Various routines with dealing with CSR matrices + * + * \author George Karypis + * \version\verbatim $Id: csr.c 21044 2017-05-24 22:50:32Z karypis $ \endverbatim + */ + +#include + +#define OMPMINOPS 50000 + +/*************************************************************************/ +/*! Allocate memory for a CSR matrix and initializes it + \returns the allocated matrix. The various fields are set to NULL. +*/ +/**************************************************************************/ +gk_csr_t *gk_csr_Create() +{ + gk_csr_t *mat=NULL; + + if ((mat = (gk_csr_t *)gk_malloc(sizeof(gk_csr_t), "gk_csr_Create: mat"))) + gk_csr_Init(mat); + + return mat; +} + + +/*************************************************************************/ +/*! Initializes the matrix + \param mat is the matrix to be initialized. +*/ +/*************************************************************************/ +void gk_csr_Init(gk_csr_t *mat) +{ + memset(mat, 0, sizeof(gk_csr_t)); + mat->nrows = mat->ncols = 0; +} + + +/*************************************************************************/ +/*! Frees all the memory allocated for matrix. + \param mat is the matrix to be freed. +*/ +/*************************************************************************/ +void gk_csr_Free(gk_csr_t **mat) +{ + if (*mat == NULL) + return; + gk_csr_FreeContents(*mat); + gk_free((void **)mat, LTERM); +} + + +/*************************************************************************/ +/*! Frees only the memory allocated for the matrix's different fields and + sets them to NULL. + \param mat is the matrix whose contents will be freed. +*/ +/*************************************************************************/ +void gk_csr_FreeContents(gk_csr_t *mat) +{ + gk_free((void *)&mat->rowptr, &mat->rowind, &mat->rowval, + &mat->rowids, &mat->rlabels, &mat->rmap, + &mat->colptr, &mat->colind, &mat->colval, + &mat->colids, &mat->clabels, &mat->cmap, + &mat->rnorms, &mat->cnorms, &mat->rsums, &mat->csums, + &mat->rsizes, &mat->csizes, &mat->rvols, &mat->cvols, + &mat->rwgts, &mat->cwgts, + LTERM); +} + + +/*************************************************************************/ +/*! Returns a copy of a matrix. + \param mat is the matrix to be duplicated. + \returns the newly created copy of the matrix. +*/ +/**************************************************************************/ +gk_csr_t *gk_csr_Dup(gk_csr_t *mat) +{ + gk_csr_t *nmat; + + nmat = gk_csr_Create(); + + nmat->nrows = mat->nrows; + nmat->ncols = mat->ncols; + + /* copy the row structure */ + if (mat->rowptr) + nmat->rowptr = gk_zcopy(mat->nrows+1, mat->rowptr, + gk_zmalloc(mat->nrows+1, "gk_csr_Dup: rowptr")); + if (mat->rowids) + nmat->rowids = gk_icopy(mat->nrows, mat->rowids, + gk_imalloc(mat->nrows, "gk_csr_Dup: rowids")); + if (mat->rlabels) + nmat->rlabels = gk_icopy(mat->nrows, mat->rlabels, + gk_imalloc(mat->nrows, "gk_csr_Dup: rlabels")); + if (mat->rnorms) + nmat->rnorms = gk_fcopy(mat->nrows, mat->rnorms, + gk_fmalloc(mat->nrows, "gk_csr_Dup: rnorms")); + if (mat->rsums) + nmat->rsums = gk_fcopy(mat->nrows, mat->rsums, + gk_fmalloc(mat->nrows, "gk_csr_Dup: rsums")); + if (mat->rsizes) + nmat->rsizes = gk_fcopy(mat->nrows, mat->rsizes, + gk_fmalloc(mat->nrows, "gk_csr_Dup: rsizes")); + if (mat->rvols) + nmat->rvols = gk_fcopy(mat->nrows, mat->rvols, + gk_fmalloc(mat->nrows, "gk_csr_Dup: rvols")); + if (mat->rwgts) + nmat->rwgts = gk_fcopy(mat->nrows, mat->rwgts, + gk_fmalloc(mat->nrows, "gk_csr_Dup: rwgts")); + if (mat->rowind) + nmat->rowind = gk_icopy(mat->rowptr[mat->nrows], mat->rowind, + gk_imalloc(mat->rowptr[mat->nrows], "gk_csr_Dup: rowind")); + if (mat->rowval) + nmat->rowval = gk_fcopy(mat->rowptr[mat->nrows], mat->rowval, + gk_fmalloc(mat->rowptr[mat->nrows], "gk_csr_Dup: rowval")); + + /* copy the col structure */ + if (mat->colptr) + nmat->colptr = gk_zcopy(mat->ncols+1, mat->colptr, + gk_zmalloc(mat->ncols+1, "gk_csr_Dup: colptr")); + if (mat->colids) + nmat->colids = gk_icopy(mat->ncols, mat->colids, + gk_imalloc(mat->ncols, "gk_csr_Dup: colids")); + if (mat->clabels) + nmat->clabels = gk_icopy(mat->ncols, mat->clabels, + gk_imalloc(mat->ncols, "gk_csr_Dup: clabels")); + if (mat->cnorms) + nmat->cnorms = gk_fcopy(mat->ncols, mat->cnorms, + gk_fmalloc(mat->ncols, "gk_csr_Dup: cnorms")); + if (mat->csums) + nmat->csums = gk_fcopy(mat->ncols, mat->csums, + gk_fmalloc(mat->ncols, "gk_csr_Dup: csums")); + if (mat->csizes) + nmat->csizes = gk_fcopy(mat->ncols, mat->csizes, + gk_fmalloc(mat->ncols, "gk_csr_Dup: csizes")); + if (mat->cvols) + nmat->cvols = gk_fcopy(mat->ncols, mat->cvols, + gk_fmalloc(mat->ncols, "gk_csr_Dup: cvols")); + if (mat->cwgts) + nmat->cwgts = gk_fcopy(mat->ncols, mat->cwgts, + gk_fmalloc(mat->ncols, "gk_csr_Dup: cwgts")); + if (mat->colind) + nmat->colind = gk_icopy(mat->colptr[mat->ncols], mat->colind, + gk_imalloc(mat->colptr[mat->ncols], "gk_csr_Dup: colind")); + if (mat->colval) + nmat->colval = gk_fcopy(mat->colptr[mat->ncols], mat->colval, + gk_fmalloc(mat->colptr[mat->ncols], "gk_csr_Dup: colval")); + + return nmat; +} + + +/*************************************************************************/ +/*! Returns a submatrix containint a set of consecutive rows. + \param mat is the original matrix. + \param rstart is the starting row. + \param nrows is the number of rows from rstart to extract. + \returns the row structure of the newly created submatrix. +*/ +/**************************************************************************/ +gk_csr_t *gk_csr_ExtractSubmatrix(gk_csr_t *mat, int rstart, int nrows) +{ + ssize_t i; + gk_csr_t *nmat; + + if (rstart+nrows > mat->nrows) + return NULL; + + nmat = gk_csr_Create(); + + nmat->nrows = nrows; + nmat->ncols = mat->ncols; + + /* copy the row structure */ + if (mat->rowptr) + nmat->rowptr = gk_zcopy(nrows+1, mat->rowptr+rstart, + gk_zmalloc(nrows+1, "gk_csr_ExtractSubmatrix: rowptr")); + for (i=nrows; i>=0; i--) + nmat->rowptr[i] -= nmat->rowptr[0]; + ASSERT(nmat->rowptr[0] == 0); + + if (mat->rowids) + nmat->rowids = gk_icopy(nrows, mat->rowids+rstart, + gk_imalloc(nrows, "gk_csr_ExtractSubmatrix: rowids")); + if (mat->rnorms) + nmat->rnorms = gk_fcopy(nrows, mat->rnorms+rstart, + gk_fmalloc(nrows, "gk_csr_ExtractSubmatrix: rnorms")); + + if (mat->rsums) + nmat->rsums = gk_fcopy(nrows, mat->rsums+rstart, + gk_fmalloc(nrows, "gk_csr_ExtractSubmatrix: rsums")); + + ASSERT(nmat->rowptr[nrows] == mat->rowptr[rstart+nrows]-mat->rowptr[rstart]); + if (mat->rowind) + nmat->rowind = gk_icopy(mat->rowptr[rstart+nrows]-mat->rowptr[rstart], + mat->rowind+mat->rowptr[rstart], + gk_imalloc(mat->rowptr[rstart+nrows]-mat->rowptr[rstart], + "gk_csr_ExtractSubmatrix: rowind")); + if (mat->rowval) + nmat->rowval = gk_fcopy(mat->rowptr[rstart+nrows]-mat->rowptr[rstart], + mat->rowval+mat->rowptr[rstart], + gk_fmalloc(mat->rowptr[rstart+nrows]-mat->rowptr[rstart], + "gk_csr_ExtractSubmatrix: rowval")); + + return nmat; +} + + +/*************************************************************************/ +/*! Returns a submatrix containing a certain set of rows. + \param mat is the original matrix. + \param nrows is the number of rows to extract. + \param rind is the set of row numbers to extract. + \returns the row structure of the newly created submatrix. +*/ +/**************************************************************************/ +gk_csr_t *gk_csr_ExtractRows(gk_csr_t *mat, int nrows, int *rind) +{ + ssize_t i, ii, j, nnz; + gk_csr_t *nmat; + + nmat = gk_csr_Create(); + + nmat->nrows = nrows; + nmat->ncols = mat->ncols; + + for (nnz=0, i=0; irowptr[rind[i]+1]-mat->rowptr[rind[i]]; + + nmat->rowptr = gk_zmalloc(nmat->nrows+1, "gk_csr_ExtractPartition: rowptr"); + nmat->rowind = gk_imalloc(nnz, "gk_csr_ExtractPartition: rowind"); + nmat->rowval = gk_fmalloc(nnz, "gk_csr_ExtractPartition: rowval"); + + nmat->rowptr[0] = 0; + for (nnz=0, j=0, ii=0; iirowptr[i+1]-mat->rowptr[i], mat->rowind+mat->rowptr[i], nmat->rowind+nnz); + gk_fcopy(mat->rowptr[i+1]-mat->rowptr[i], mat->rowval+mat->rowptr[i], nmat->rowval+nnz); + nnz += mat->rowptr[i+1]-mat->rowptr[i]; + nmat->rowptr[++j] = nnz; + } + ASSERT(j == nmat->nrows); + + return nmat; +} + + +/*************************************************************************/ +/*! Returns a submatrix corresponding to a specified partitioning of rows. + \param mat is the original matrix. + \param part is the partitioning vector of the rows. + \param pid is the partition ID that will be extracted. + \returns the row structure of the newly created submatrix. +*/ +/**************************************************************************/ +gk_csr_t *gk_csr_ExtractPartition(gk_csr_t *mat, int *part, int pid) +{ + ssize_t i, j, nnz; + gk_csr_t *nmat; + + nmat = gk_csr_Create(); + + nmat->nrows = 0; + nmat->ncols = mat->ncols; + + for (nnz=0, i=0; inrows; i++) { + if (part[i] == pid) { + nmat->nrows++; + nnz += mat->rowptr[i+1]-mat->rowptr[i]; + } + } + + nmat->rowptr = gk_zmalloc(nmat->nrows+1, "gk_csr_ExtractPartition: rowptr"); + nmat->rowind = gk_imalloc(nnz, "gk_csr_ExtractPartition: rowind"); + nmat->rowval = gk_fmalloc(nnz, "gk_csr_ExtractPartition: rowval"); + + nmat->rowptr[0] = 0; + for (nnz=0, j=0, i=0; inrows; i++) { + if (part[i] == pid) { + gk_icopy(mat->rowptr[i+1]-mat->rowptr[i], mat->rowind+mat->rowptr[i], nmat->rowind+nnz); + gk_fcopy(mat->rowptr[i+1]-mat->rowptr[i], mat->rowval+mat->rowptr[i], nmat->rowval+nnz); + nnz += mat->rowptr[i+1]-mat->rowptr[i]; + nmat->rowptr[++j] = nnz; + } + } + ASSERT(j == nmat->nrows); + + return nmat; +} + + +/*************************************************************************/ +/*! Splits the matrix into multiple sub-matrices based on the provided + color array. + \param mat is the original matrix. + \param color is an array of size equal to the number of non-zeros + in the matrix (row-wise structure). The matrix is split into + as many parts as the number of colors. For meaningfull results, + the colors should be numbered consecutively starting from 0. + \returns an array of matrices for each supplied color number. +*/ +/**************************************************************************/ +gk_csr_t **gk_csr_Split(gk_csr_t *mat, int *color) +{ + ssize_t i, j; + int nrows, ncolors; + ssize_t *rowptr; + int *rowind; + float *rowval; + gk_csr_t **smats; + + nrows = mat->nrows; + rowptr = mat->rowptr; + rowind = mat->rowind; + rowval = mat->rowval; + + ncolors = gk_imax(rowptr[nrows], color, 1)+1; + + smats = (gk_csr_t **)gk_malloc(sizeof(gk_csr_t *)*ncolors, "gk_csr_Split: smats"); + for (i=0; inrows = mat->nrows; + smats[i]->ncols = mat->ncols; + smats[i]->rowptr = gk_zsmalloc(nrows+1, 0, "gk_csr_Split: smats[i]->rowptr"); + } + + for (i=0; irowptr[i]++; + } + for (i=0; irowptr); + + for (i=0; irowind = gk_imalloc(smats[i]->rowptr[nrows], "gk_csr_Split: smats[i]->rowind"); + smats[i]->rowval = gk_fmalloc(smats[i]->rowptr[nrows], "gk_csr_Split: smats[i]->rowval"); + } + + for (i=0; irowind[smats[color[j]]->rowptr[i]] = rowind[j]; + smats[color[j]]->rowval[smats[color[j]]->rowptr[i]] = rowval[j]; + smats[color[j]]->rowptr[i]++; + } + } + + for (i=0; irowptr); + + return smats; +} + + +/**************************************************************************/ +/*! Determines the format of the CSR matrix based on the extension. + \param filename is the name of the file. + \param the user-supplied format. + \returns the type. The extension of the file directly maps to the + name of the format. +*/ +/**************************************************************************/ +int gk_csr_DetermineFormat(char *filename, int format) +{ + if (format != GK_CSR_FMT_AUTO) + return format; + + format = GK_CSR_FMT_CSR; + char *extension = gk_getextname(filename); + + if (!strcmp(extension, "csr")) + format = GK_CSR_FMT_CSR; + else if (!strcmp(extension, "ijv")) + format = GK_CSR_FMT_IJV; + else if (!strcmp(extension, "cluto")) + format = GK_CSR_FMT_CLUTO; + else if (!strcmp(extension, "metis")) + format = GK_CSR_FMT_METIS; + else if (!strcmp(extension, "binrow")) + format = GK_CSR_FMT_BINROW; + else if (!strcmp(extension, "bincol")) + format = GK_CSR_FMT_BINCOL; + else if (!strcmp(extension, "bijv")) + format = GK_CSR_FMT_BIJV; + + gk_free((void **)&extension, LTERM); + + return format; +} + + +/**************************************************************************/ +/*! Reads a CSR matrix from the supplied file and stores it the matrix's + forward structure. + \param filename is the file that stores the data. + \param format is either GK_CSR_FMT_METIS, GK_CSR_FMT_CLUTO, + GK_CSR_FMT_CSR, GK_CSR_FMT_BINROW, GK_CSR_FMT_BINCOL + specifying the type of the input format. + The GK_CSR_FMT_CSR does not contain a header + line, whereas the GK_CSR_FMT_BINROW is a binary format written + by gk_csr_Write() using the same format specifier. + \param readvals is either 1 or 0, indicating if the CSR file contains + values or it does not. It only applies when GK_CSR_FMT_CSR is + used. + \param numbering is either 1 or 0, indicating if the numbering of the + indices start from 1 or 0, respectively. If they start from 1, + they are automatically decreamented during input so that they + will start from 0. It only applies when GK_CSR_FMT_CSR is + used. + \returns the matrix that was read. +*/ +/**************************************************************************/ +gk_csr_t *gk_csr_Read(char *filename, int format, int readvals, int numbering) +{ + ssize_t i, k, l; + size_t nfields, nrows, ncols, nnz, fmt, ncon; + size_t lnlen; + ssize_t *rowptr; + int *rowind, *iinds, *jinds, ival; + float *rowval=NULL, *vals, fval; + int readsizes, readwgts; + char *line=NULL, *head, *tail, fmtstr[256]; + FILE *fpin; + gk_csr_t *mat=NULL; + + format = gk_csr_DetermineFormat(filename, format); + + if (!gk_fexists(filename)) + gk_errexit(SIGERR, "File %s does not exist!\n", filename); + + switch (format) { + case GK_CSR_FMT_BINROW: + mat = gk_csr_Create(); + + fpin = gk_fopen(filename, "rb", "gk_csr_Read: fpin"); + if (fread(&(mat->nrows), sizeof(int32_t), 1, fpin) != 1) + gk_errexit(SIGERR, "Failed to read the nrows from file %s!\n", filename); + if (fread(&(mat->ncols), sizeof(int32_t), 1, fpin) != 1) + gk_errexit(SIGERR, "Failed to read the ncols from file %s!\n", filename); + mat->rowptr = gk_zmalloc(mat->nrows+1, "gk_csr_Read: rowptr"); + if (fread(mat->rowptr, sizeof(ssize_t), mat->nrows+1, fpin) != mat->nrows+1) + gk_errexit(SIGERR, "Failed to read the rowptr from file %s!\n", filename); + mat->rowind = gk_imalloc(mat->rowptr[mat->nrows], "gk_csr_Read: rowind"); + if (fread(mat->rowind, sizeof(int32_t), mat->rowptr[mat->nrows], fpin) != mat->rowptr[mat->nrows]) + gk_errexit(SIGERR, "Failed to read the rowind from file %s!\n", filename); + if (readvals == 1) { + mat->rowval = gk_fmalloc(mat->rowptr[mat->nrows], "gk_csr_Read: rowval"); + if (fread(mat->rowval, sizeof(float), mat->rowptr[mat->nrows], fpin) != mat->rowptr[mat->nrows]) + gk_errexit(SIGERR, "Failed to read the rowval from file %s!\n", filename); + } + + gk_fclose(fpin); + return mat; + + break; + + case GK_CSR_FMT_BINCOL: + mat = gk_csr_Create(); + + fpin = gk_fopen(filename, "rb", "gk_csr_Read: fpin"); + if (fread(&(mat->nrows), sizeof(int32_t), 1, fpin) != 1) + gk_errexit(SIGERR, "Failed to read the nrows from file %s!\n", filename); + if (fread(&(mat->ncols), sizeof(int32_t), 1, fpin) != 1) + gk_errexit(SIGERR, "Failed to read the ncols from file %s!\n", filename); + mat->colptr = gk_zmalloc(mat->ncols+1, "gk_csr_Read: colptr"); + if (fread(mat->colptr, sizeof(ssize_t), mat->ncols+1, fpin) != mat->ncols+1) + gk_errexit(SIGERR, "Failed to read the colptr from file %s!\n", filename); + mat->colind = gk_imalloc(mat->colptr[mat->ncols], "gk_csr_Read: colind"); + if (fread(mat->colind, sizeof(int32_t), mat->colptr[mat->ncols], fpin) != mat->colptr[mat->ncols]) + gk_errexit(SIGERR, "Failed to read the colind from file %s!\n", filename); + if (readvals) { + mat->colval = gk_fmalloc(mat->colptr[mat->ncols], "gk_csr_Read: colval"); + if (fread(mat->colval, sizeof(float), mat->colptr[mat->ncols], fpin) != mat->colptr[mat->ncols]) + gk_errexit(SIGERR, "Failed to read the colval from file %s!\n", filename); + } + + gk_fclose(fpin); + return mat; + + break; + + + case GK_CSR_FMT_IJV: + gk_getfilestats(filename, &nrows, &nnz, NULL, NULL); + + if (readvals == 1 && 3*nrows != nnz) + gk_errexit(SIGERR, "Error: The number of numbers (%zd %d) in the input file is not a multiple of 3.\n", nnz, readvals); + if (readvals == 0 && 2*nrows != nnz) + gk_errexit(SIGERR, "Error: The number of numbers (%zd %d) in the input file is not a multiple of 2.\n", nnz, readvals); + + nnz = nrows; + numbering = (numbering ? - 1 : 0); + + /* read the data into three arrays */ + iinds = gk_i32malloc(nnz, "iinds"); + jinds = gk_i32malloc(nnz, "jinds"); + vals = (readvals ? gk_fmalloc(nnz, "vals") : NULL); + + fpin = gk_fopen(filename, "r", "gk_csr_Read: fpin"); + for (nrows=0, ncols=0, i=0; inrows = nrows; + mat->ncols = ncols; + rowptr = mat->rowptr = gk_zsmalloc(nrows+1, 0, "rowptr"); + rowind = mat->rowind = gk_i32malloc(nnz, "rowind"); + if (readvals) + rowval = mat->rowval = gk_fmalloc(nnz, "rowval"); + + for (i=0; inrows), sizeof(int32_t), 1, fpin) != 1) + gk_errexit(SIGERR, "Failed to read the nrows from file %s!\n", filename); + if (fread(&(mat->ncols), sizeof(int32_t), 1, fpin) != 1) + gk_errexit(SIGERR, "Failed to read the ncols from file %s!\n", filename); + if (fread(&nnz, sizeof(size_t), 1, fpin) != 1) + gk_errexit(SIGERR, "Failed to read the nnz from file %s!\n", filename); + if (fread(&readvals, sizeof(int32_t), 1, fpin) != 1) + gk_errexit(SIGERR, "Failed to read the readvals from file %s!\n", filename); + + /* read the data into three arrays */ + iinds = gk_i32malloc(nnz, "iinds"); + jinds = gk_i32malloc(nnz, "jinds"); + vals = (readvals ? gk_fmalloc(nnz, "vals") : NULL); + + for (i=0; irowptr = gk_zsmalloc(mat->nrows+1, 0, "rowptr"); + rowind = mat->rowind = gk_i32malloc(nnz, "rowind"); + if (readvals) + rowval = mat->rowval = gk_fmalloc(nnz, "rowval"); + + for (i=0; inrows, rowptr); + + for (i=0; inrows, rowptr); + + gk_free((void **)&iinds, &jinds, &vals, LTERM); + + return mat; + + break; + + + /* the following are handled by a common input code, that comes after the switch */ + + case GK_CSR_FMT_CLUTO: + fpin = gk_fopen(filename, "r", "gk_csr_Read: fpin"); + do { + if (gk_getline(&line, &lnlen, fpin) <= 0) + gk_errexit(SIGERR, "Premature end of input file: file:%s\n", filename); + } while (line[0] == '%'); + + if (sscanf(line, "%zu %zu %zu", &nrows, &ncols, &nnz) != 3) + gk_errexit(SIGERR, "Header line must contain 3 integers.\n"); + + readsizes = 0; + readwgts = 0; + readvals = 1; + numbering = 1; + + break; + + case GK_CSR_FMT_METIS: + fpin = gk_fopen(filename, "r", "gk_csr_Read: fpin"); + do { + if (gk_getline(&line, &lnlen, fpin) <= 0) + gk_errexit(SIGERR, "Premature end of input file: file:%s\n", filename); + } while (line[0] == '%'); + + fmt = ncon = 0; + nfields = sscanf(line, "%zu %zu %zu %zu", &nrows, &nnz, &fmt, &ncon); + if (nfields < 2) + gk_errexit(SIGERR, "Header line must contain at least 2 integers (#vtxs and #edges).\n"); + + ncols = nrows; + nnz *= 2; + + if (fmt > 111) + gk_errexit(SIGERR, "Cannot read this type of file format [fmt=%zu]!\n", fmt); + + sprintf(fmtstr, "%03zu", fmt%1000); + readsizes = (fmtstr[0] == '1'); + readwgts = (fmtstr[1] == '1'); + readvals = (fmtstr[2] == '1'); + numbering = 1; + ncon = (ncon == 0 ? 1 : ncon); + + break; + + case GK_CSR_FMT_CSR: + readsizes = 0; + readwgts = 0; + + gk_getfilestats(filename, &nrows, &nnz, NULL, NULL); + + if (readvals == 1 && nnz%2 == 1) + gk_errexit(SIGERR, "Error: The number of numbers (%zd %d) in the input file is not even.\n", nnz, readvals); + if (readvals == 1) + nnz = nnz/2; + fpin = gk_fopen(filename, "r", "gk_csr_Read: fpin"); + + break; + + default: + gk_errexit(SIGERR, "Unknown csr format.\n"); + return NULL; + } + + mat = gk_csr_Create(); + + mat->nrows = nrows; + + rowptr = mat->rowptr = gk_zmalloc(nrows+1, "gk_csr_Read: rowptr"); + rowind = mat->rowind = gk_imalloc(nnz, "gk_csr_Read: rowind"); + if (readvals != 2) + rowval = mat->rowval = gk_fsmalloc(nnz, 1.0, "gk_csr_Read: rowval"); + + if (readsizes) + mat->rsizes = gk_fsmalloc(nrows, 0.0, "gk_csr_Read: rsizes"); + + if (readwgts) + mat->rwgts = gk_fsmalloc(nrows*ncon, 0.0, "gk_csr_Read: rwgts"); + + /*---------------------------------------------------------------------- + * Read the sparse matrix file + *---------------------------------------------------------------------*/ + numbering = (numbering ? -1 : 0); + for (ncols=0, rowptr[0]=0, k=0, i=0; irsizes[i] = (float)strtod(head, &tail); +#else + mat->rsizes[i] = strtof(head, &tail); +#endif + if (tail == head) + gk_errexit(SIGERR, "The line for vertex %zd does not have size information\n", i+1); + if (mat->rsizes[i] < 0) + errexit("The size for vertex %zd must be >= 0\n", i+1); + head = tail; + } + + /* Read vertex weights */ + if (readwgts) { + for (l=0; lrwgts[i*ncon+l] = (float)strtod(head, &tail); +#else + mat->rwgts[i*ncon+l] = strtof(head, &tail); +#endif + if (tail == head) + errexit("The line for vertex %zd does not have enough weights " + "for the %d constraints.\n", i+1, ncon); + if (mat->rwgts[i*ncon+l] < 0) + errexit("The weight vertex %zd and constraint %zd must be >= 0\n", i+1, l); + head = tail; + } + } + + + /* Read the rest of the row */ + while (1) { + ival = (int)strtol(head, &tail, 0); + if (tail == head) + break; + head = tail; + + if ((rowind[k] = ival + numbering) < 0) + gk_errexit(SIGERR, "Error: Invalid column number %d at row %zd.\n", ival, i); + + ncols = gk_max(rowind[k], ncols); + + if (readvals == 1) { +#ifdef __MSC__ + fval = (float)strtod(head, &tail); +#else + fval = strtof(head, &tail); +#endif + if (tail == head) + gk_errexit(SIGERR, "Value could not be found for column! Row:%zd, NNZ:%zd\n", i, k); + head = tail; + + rowval[k] = fval; + } + k++; + } + rowptr[i+1] = k; + } + + if (format == GK_CSR_FMT_METIS) { + ASSERT(ncols+1 == mat->nrows); + mat->ncols = mat->nrows; + } + else { + mat->ncols = ncols+1; + } + + if (k != nnz) + gk_errexit(SIGERR, "gk_csr_Read: Something wrong with the number of nonzeros in " + "the input file. NNZ=%zd, ActualNNZ=%zd.\n", nnz, k); + + gk_fclose(fpin); + + gk_free((void **)&line, LTERM); + + return mat; +} + + +/**************************************************************************/ +/*! Writes the row-based structure of a matrix into a file. + \param mat is the matrix to be written, + \param filename is the name of the output file. + \param format is one of: GK_CSR_FMT_CLUTO, GK_CSR_FMT_CSR, + GK_CSR_FMT_BINROW, GK_CSR_FMT_BINCOL, GK_CSR_FMT_BIJV. + \param writevals is either 1 or 0 indicating if the values will be + written or not. This is only applicable when GK_CSR_FMT_CSR + is used. + \param numbering is either 1 or 0 indicating if the internal 0-based + numbering will be shifted by one or not during output. This + is only applicable when GK_CSR_FMT_CSR is used. +*/ +/**************************************************************************/ +void gk_csr_Write(gk_csr_t *mat, char *filename, int format, int writevals, int numbering) +{ + ssize_t i, j; + int32_t edge[2]; + FILE *fpout; + + format = gk_csr_DetermineFormat(filename, format); + + switch (format) { + case GK_CSR_FMT_METIS: + if (mat->nrows != mat->ncols || mat->rowptr[mat->nrows]%2 == 1) + gk_errexit(SIGERR, "METIS output format requires a square symmetric matrix.\n"); + + if (filename) + fpout = gk_fopen(filename, "w", "gk_csr_Write: fpout"); + else + fpout = stdout; + + fprintf(fpout, "%d %zd\n", mat->nrows, mat->rowptr[mat->nrows]/2); + for (i=0; inrows; i++) { + for (j=mat->rowptr[i]; jrowptr[i+1]; j++) + fprintf(fpout, " %d", mat->rowind[j]+1); + fprintf(fpout, "\n"); + } + if (filename) + gk_fclose(fpout); + break; + + case GK_CSR_FMT_BINROW: + if (filename == NULL) + gk_errexit(SIGERR, "The filename parameter cannot be NULL.\n"); + fpout = gk_fopen(filename, "wb", "gk_csr_Write: fpout"); + + fwrite(&(mat->nrows), sizeof(int32_t), 1, fpout); + fwrite(&(mat->ncols), sizeof(int32_t), 1, fpout); + fwrite(mat->rowptr, sizeof(ssize_t), mat->nrows+1, fpout); + fwrite(mat->rowind, sizeof(int32_t), mat->rowptr[mat->nrows], fpout); + if (writevals) + fwrite(mat->rowval, sizeof(float), mat->rowptr[mat->nrows], fpout); + + gk_fclose(fpout); + return; + + break; + + case GK_CSR_FMT_BINCOL: + if (filename == NULL) + gk_errexit(SIGERR, "The filename parameter cannot be NULL.\n"); + fpout = gk_fopen(filename, "wb", "gk_csr_Write: fpout"); + + fwrite(&(mat->nrows), sizeof(int32_t), 1, fpout); + fwrite(&(mat->ncols), sizeof(int32_t), 1, fpout); + fwrite(mat->colptr, sizeof(ssize_t), mat->ncols+1, fpout); + fwrite(mat->colind, sizeof(int32_t), mat->colptr[mat->ncols], fpout); + if (writevals) + fwrite(mat->colval, sizeof(float), mat->colptr[mat->ncols], fpout); + + gk_fclose(fpout); + return; + + break; + + case GK_CSR_FMT_IJV: + if (filename == NULL) + gk_errexit(SIGERR, "The filename parameter cannot be NULL.\n"); + fpout = gk_fopen(filename, "w", "gk_csr_Write: fpout"); + + numbering = (numbering ? 1 : 0); + for (i=0; inrows; i++) { + for (j=mat->rowptr[i]; jrowptr[i+1]; j++) { + if (writevals) + fprintf(fpout, "%zd %d %.8f\n", i+numbering, mat->rowind[j]+numbering, mat->rowval[j]); + else + fprintf(fpout, "%zd %d\n", i+numbering, mat->rowind[j]+numbering); + } + } + + gk_fclose(fpout); + return; + + break; + + case GK_CSR_FMT_BIJV: + if (filename == NULL) + gk_errexit(SIGERR, "The filename parameter cannot be NULL.\n"); + fpout = gk_fopen(filename, "wb", "gk_csr_Write: fpout"); + + fwrite(&(mat->nrows), sizeof(int32_t), 1, fpout); + fwrite(&(mat->ncols), sizeof(int32_t), 1, fpout); + fwrite(&(mat->rowptr[mat->nrows]), sizeof(size_t), 1, fpout); + fwrite(&writevals, sizeof(int32_t), 1, fpout); + + for (i=0; inrows; i++) { + edge[0] = i; + for (j=mat->rowptr[i]; jrowptr[i+1]; j++) { + edge[1] = mat->rowind[j]; + fwrite(edge, sizeof(int32_t), 2, fpout); + if (writevals) + fwrite(&(mat->rowval[j]), sizeof(float), 1, fpout); + } + } + + gk_fclose(fpout); + return; + + break; + + default: + if (filename) + fpout = gk_fopen(filename, "w", "gk_csr_Write: fpout"); + else + fpout = stdout; + + if (format == GK_CSR_FMT_CLUTO) { + fprintf(fpout, "%d %d %zd\n", mat->nrows, mat->ncols, mat->rowptr[mat->nrows]); + writevals = 1; + numbering = 1; + } + + for (i=0; inrows; i++) { + for (j=mat->rowptr[i]; jrowptr[i+1]; j++) { + fprintf(fpout, " %d", mat->rowind[j]+(numbering ? 1 : 0)); + if (writevals) + fprintf(fpout, " %f", mat->rowval[j]); + } + fprintf(fpout, "\n"); + } + if (filename) + gk_fclose(fpout); + } +} + + +/*************************************************************************/ +/*! Prunes certain rows/columns of the matrix. The prunning takes place + by analyzing the row structure of the matrix. The prunning takes place + by removing rows/columns but it does not affect the numbering of the + remaining rows/columns. + + \param mat the matrix to be prunned, + \param what indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL) + of the matrix will be prunned, + \param minf is the minimum number of rows (columns) that a column (row) must + be present in order to be kept, + \param maxf is the maximum number of rows (columns) that a column (row) must + be present at in order to be kept. + \returns the prunned matrix consisting only of its row-based structure. + The input matrix is not modified. +*/ +/**************************************************************************/ +gk_csr_t *gk_csr_Prune(gk_csr_t *mat, int what, int minf, int maxf) +{ + ssize_t i, j, nnz; + int nrows, ncols; + ssize_t *rowptr, *nrowptr; + int *rowind, *nrowind, *collen; + float *rowval, *nrowval; + gk_csr_t *nmat; + + nmat = gk_csr_Create(); + + nrows = nmat->nrows = mat->nrows; + ncols = nmat->ncols = mat->ncols; + + rowptr = mat->rowptr; + rowind = mat->rowind; + rowval = mat->rowval; + + nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_Prune: nrowptr"); + nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_Prune: nrowind"); + nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_Prune: nrowval"); + + + switch (what) { + case GK_CSR_COL: + collen = gk_ismalloc(ncols, 0, "gk_csr_Prune: collen"); + + for (i=0; i= minf && collen[i] <= maxf ? 1 : 0); + + nrowptr[0] = 0; + for (nnz=0, i=0; i= minf && rowptr[i+1]-rowptr[i] <= maxf) { + for (j=rowptr[i]; jnrows = mat->nrows; + ncols = nmat->ncols = mat->ncols; + + rowptr = mat->rowptr; + rowind = mat->rowind; + rowval = mat->rowval; + colptr = mat->colptr; + colind = mat->colind; + colval = mat->colval; + + nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_LowFilter: nrowptr"); + nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_LowFilter: nrowind"); + nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_LowFilter: nrowval"); + + + switch (what) { + case GK_CSR_COL: + if (mat->colptr == NULL) + gk_errexit(SIGERR, "Cannot filter columns when column-based structure has not been created.\n"); + + gk_zcopy(nrows+1, rowptr, nrowptr); + + for (i=0; irowptr == NULL) + gk_errexit(SIGERR, "Cannot filter rows when row-based structure has not been created.\n"); + + for (i=0; inrows = mat->nrows; + ncols = nmat->ncols = mat->ncols; + + rowptr = mat->rowptr; + rowind = mat->rowind; + rowval = mat->rowval; + colptr = mat->colptr; + colind = mat->colind; + colval = mat->colval; + + nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_LowFilter: nrowptr"); + nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_LowFilter: nrowind"); + nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_LowFilter: nrowval"); + + + switch (what) { + case GK_CSR_COL: + if (mat->colptr == NULL) + gk_errexit(SIGERR, "Cannot filter columns when column-based structure has not been created.\n"); + + cand = gk_fkvmalloc(nrows, "gk_csr_LowFilter: cand"); + + gk_zcopy(nrows+1, rowptr, nrowptr); + for (i=0; irowptr == NULL) + gk_errexit(SIGERR, "Cannot filter rows when row-based structure has not been created.\n"); + + cand = gk_fkvmalloc(ncols, "gk_csr_LowFilter: cand"); + + nrowptr[0] = 0; + for (nnz=0, i=0; inrows = mat->nrows; + nmat->ncols = mat->ncols; + + nrows = mat->nrows; + rowptr = mat->rowptr; + rowind = mat->rowind; + rowval = mat->rowval; + + nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_ZScoreFilter: nrowptr"); + nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_ZScoreFilter: nrowind"); + nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_ZScoreFilter: nrowval"); + + + switch (what) { + case GK_CSR_COL: + gk_errexit(SIGERR, "This has not been implemented yet.\n"); + break; + + case GK_CSR_ROW: + if (mat->rowptr == NULL) + gk_errexit(SIGERR, "Cannot filter rows when row-based structure has not been created.\n"); + + nrowptr[0] = 0; + for (nnz=0, i=0; i avgwgt) { + nrowind[nnz] = rowind[j]; + nrowval[nnz] = rowval[j]; + nnz++; + } + } + nrowptr[i+1] = nnz; + } + break; + + default: + gk_csr_Free(&nmat); + gk_errexit(SIGERR, "Unknown prunning type of %d\n", what); + return NULL; + } + + return nmat; +} + + +/*************************************************************************/ +/*! Compacts the column-space of the matrix by removing empty columns. + As a result of the compaction, the column numbers are renumbered. + The compaction operation is done in place and only affects the row-based + representation of the matrix. + The new columns are ordered in decreasing frequency. + + \param mat the matrix whose empty columns will be removed. +*/ +/**************************************************************************/ +void gk_csr_CompactColumns(gk_csr_t *mat) +{ + ssize_t i; + int nrows, ncols, nncols; + ssize_t *rowptr; + int *rowind, *colmap; + gk_ikv_t *clens; + + nrows = mat->nrows; + ncols = mat->ncols; + rowptr = mat->rowptr; + rowind = mat->rowind; + + colmap = gk_imalloc(ncols, "gk_csr_CompactColumns: colmap"); + + clens = gk_ikvmalloc(ncols, "gk_csr_CompactColumns: clens"); + for (i=0; i 0) + colmap[clens[i].val] = nncols++; + else + break; + } + + for (i=0; incols = nncols; + + gk_free((void **)&colmap, &clens, LTERM); +} + + +/*************************************************************************/ +/*! Sorts the indices in increasing order + \param mat the matrix itself, + \param what is either GK_CSR_ROW or GK_CSR_COL indicating which set of + indices to sort. +*/ +/**************************************************************************/ +void gk_csr_SortIndices(gk_csr_t *mat, int what) +{ + int n, nn=0; + ssize_t *ptr; + int *ind; + float *val; + + switch (what) { + case GK_CSR_ROW: + if (!mat->rowptr) + gk_errexit(SIGERR, "Row-based view of the matrix does not exists.\n"); + + n = mat->nrows; + ptr = mat->rowptr; + ind = mat->rowind; + val = mat->rowval; + break; + + case GK_CSR_COL: + if (!mat->colptr) + gk_errexit(SIGERR, "Column-based view of the matrix does not exists.\n"); + + n = mat->ncols; + ptr = mat->colptr; + ind = mat->colind; + val = mat->colval; + break; + + default: + gk_errexit(SIGERR, "Invalid index type of %d.\n", what); + return; + } + + #pragma omp parallel if (n > 100) + { + ssize_t i, j, k; + gk_ikv_t *cand; + float *tval; + + #pragma omp single + for (i=0; i ptr[i] && ind[j] < ind[j-1]) + k = 1; /* an inversion */ + cand[j-ptr[i]].val = j-ptr[i]; + cand[j-ptr[i]].key = ind[j]; + tval[j-ptr[i]] = val[j]; + } + if (k) { + gk_ikvsorti(ptr[i+1]-ptr[i], cand); + for (j=ptr[i]; jnrows; + fptr = mat->rowptr; + find = mat->rowind; + fval = mat->rowval; + + if (mat->colptr) gk_free((void **)&mat->colptr, LTERM); + if (mat->colind) gk_free((void **)&mat->colind, LTERM); + if (mat->colval) gk_free((void **)&mat->colval, LTERM); + + nr = mat->ncols; + rptr = mat->colptr = gk_zsmalloc(nr+1, 0, "gk_csr_CreateIndex: rptr"); + rind = mat->colind = gk_imalloc(fptr[nf], "gk_csr_CreateIndex: rind"); + rval = mat->colval = (fval ? gk_fmalloc(fptr[nf], "gk_csr_CreateIndex: rval") : NULL); + break; + case GK_CSR_ROW: + nf = mat->ncols; + fptr = mat->colptr; + find = mat->colind; + fval = mat->colval; + + if (mat->rowptr) gk_free((void **)&mat->rowptr, LTERM); + if (mat->rowind) gk_free((void **)&mat->rowind, LTERM); + if (mat->rowval) gk_free((void **)&mat->rowval, LTERM); + + nr = mat->nrows; + rptr = mat->rowptr = gk_zsmalloc(nr+1, 0, "gk_csr_CreateIndex: rptr"); + rind = mat->rowind = gk_imalloc(fptr[nf], "gk_csr_CreateIndex: rind"); + rval = mat->rowval = (fval ? gk_fmalloc(fptr[nf], "gk_csr_CreateIndex: rval") : NULL); + break; + default: + gk_errexit(SIGERR, "Invalid index type of %d.\n", what); + return; + } + + + for (i=0; i 6*nr) { + for (i=0; irowval) { + n = mat->nrows; + ptr = mat->rowptr; + val = mat->rowval; + + #pragma omp parallel for if (ptr[n] > OMPMINOPS) private(j,sum) schedule(static) + for (i=0; i 0 */ + if (sum > 0) + sum = 1.0/sum; + } + else if (norm == 2) { + for (j=ptr[i]; j 0) + sum = 1.0/sqrt(sum); + } + for (j=ptr[i]; jcolval) { + n = mat->ncols; + ptr = mat->colptr; + val = mat->colval; + + #pragma omp parallel for if (ptr[n] > OMPMINOPS) private(j,sum) schedule(static) + for (i=0; i 0 */ + if (sum > 0) + sum = 1.0/sum; + } + else if (norm == 2) { + for (j=ptr[i]; j 0) + sum = 1.0/sqrt(sum); + } + for (j=ptr[i]; jnrows; + rowptr = mat->rowptr; + rowind = mat->rowind; + rowval = mat->rowval; + + switch (type) { + case GK_CSR_MAXTF: /* TF' = .5 + .5*TF/MAX(TF) */ + #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j, maxtf) schedule(static) + for (i=0; i OMPMINOPS) private(j, maxtf) schedule(static) + for (i=0; i OMPMINOPS) private(j) schedule(static) + for (i=0; i OMPMINOPS) private(j) schedule(static) + for (i=0; i OMPMINOPS) private(j) schedule(static) + for (i=0; i OMPMINOPS) private(j) schedule(static) + for (i=0; i OMPMINOPS) private(j) schedule(static) + for (i=0; i OMPMINOPS) schedule(static,32) + for (i=0; i0.0 ? log(rowval[i]) : -log(-rowval[i]))*logscale; + } +#ifdef XXX + #pragma omp parallel for private(j) schedule(static) + for (i=0; i0.0 ? log(rowval[j]) : -log(-rowval[j]))*logscale; + //rowval[j] = 1+sign(rowval[j], log(fabs(rowval[j]))*logscale); + } + } +#endif + break; + + case GK_CSR_IDF: /* TF' = TF*IDF */ + ncols = mat->ncols; + cscale = gk_fmalloc(ncols, "gk_csr_Scale: cscale"); + collen = gk_ismalloc(ncols, 0, "gk_csr_Scale: collen"); + + for (i=0; i OMPMINOPS) schedule(static) + for (i=0; i 0 ? log(1.0*nrows/collen[i]) : 0.0); + + #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j) schedule(static) + for (i=0; incols; + cscale = gk_fmalloc(ncols, "gk_csr_Scale: cscale"); + collen = gk_ismalloc(ncols, 0, "gk_csr_Scale: collen"); + + for (i=0; i OMPMINOPS) schedule(static) reduction(+:nnzcols) + for (i=0; i 0 ? 1 : 0); + + bgfreq = gk_max(10, (ssize_t)(.5*rowptr[nrows]/nnzcols)); + printf("nnz: %zd, nnzcols: %d, bgfreq: %d\n", rowptr[nrows], nnzcols, bgfreq); + + #pragma omp parallel for if (ncols > OMPMINOPS) schedule(static) + for (i=0; i 0 ? log(1.0*(nrows+2*bgfreq)/(bgfreq+collen[i])) : 0.0); + + #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j) schedule(static) + for (i=0; inrows; + ptr = mat->rowptr; + val = mat->rowval; + + if (mat->rsums) + gk_free((void **)&mat->rsums, LTERM); + + sums = mat->rsums = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: sums"); + break; + case GK_CSR_COL: + n = mat->ncols; + ptr = mat->colptr; + val = mat->colval; + + if (mat->csums) + gk_free((void **)&mat->csums, LTERM); + + sums = mat->csums = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: sums"); + break; + default: + gk_errexit(SIGERR, "Invalid sum type of %d.\n", what); + return; + } + + if (val) { + #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static) + for (i=0; i OMPMINOPS) schedule(static) + for (i=0; inrows; + ptr = mat->rowptr; + val = mat->rowval; + + if (mat->rnorms) gk_free((void **)&mat->rnorms, LTERM); + + norms = mat->rnorms = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: norms"); + break; + case GK_CSR_COL: + n = mat->ncols; + ptr = mat->colptr; + val = mat->colval; + + if (mat->cnorms) gk_free((void **)&mat->cnorms, LTERM); + + norms = mat->cnorms = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: norms"); + break; + default: + gk_errexit(SIGERR, "Invalid norm type of %d.\n", what); + return; + } + + if (val) { + #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static) + for (i=0; i OMPMINOPS) schedule(static) + for (i=0; inrows; + ptr = mat->rowptr; + val = mat->rowval; + + if (mat->rnorms) gk_free((void **)&mat->rnorms, LTERM); + + norms = mat->rnorms = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: norms"); + break; + case GK_CSR_COL: + n = mat->ncols; + ptr = mat->colptr; + val = mat->colval; + + if (mat->cnorms) gk_free((void **)&mat->cnorms, LTERM); + + norms = mat->cnorms = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: norms"); + break; + default: + gk_errexit(SIGERR, "Invalid norm type of %d.\n", what); + return; + } + + if (val) { + #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static) + for (i=0; i OMPMINOPS) schedule(static) + for (i=0; inrows != mat->ncols) + gk_errexit(SIGERR, "The matrix is not square for a symmetric rowcol shuffling.\n"); + + nrows = mat->nrows; + ncols = mat->ncols; + rowptr = mat->rowptr; + rowind = mat->rowind; + rowval = mat->rowval; + + rperm = gk_imalloc(nrows, "gk_csr_Shuffle: rperm"); + cperm = gk_imalloc(ncols, "gk_csr_Shuffle: cperm"); + + switch (what) { + case GK_CSR_ROW: + gk_RandomPermute(nrows, rperm, 1); + for (i=0; i<20; i++) + gk_RandomPermute(nrows, rperm, 0); + + for (i=0; inrows = nrows; + nmat->ncols = ncols; + + nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_Shuffle: nrowptr"); + nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_Shuffle: nrowind"); + nrowval = nmat->rowval = (rowval ? gk_fmalloc(rowptr[nrows], "gk_csr_Shuffle: nrowval") : NULL) ; + + for (i=0; icolptr; + colind = mat->colind; + colval = mat->colval; + + mat->colptr = NULL; + mat->colind = NULL; + mat->colval = NULL; + + gk_csr_CreateIndex(mat, GK_CSR_COL); + + nmat = gk_csr_Create(); + nmat->nrows = mat->ncols; + nmat->ncols = mat->nrows; + nmat->rowptr = mat->colptr; + nmat->rowind = mat->colind; + nmat->rowval = mat->colval; + + mat->colptr = colptr; + mat->colind = colind; + mat->colval = colval; + + return nmat; + +} + + +/*************************************************************************/ +/*! Computes the similarity between two rows/columns + + \param mat the matrix itself. The routine assumes that the indices + are sorted in increasing order. + \param i1 is the first row/column, + \param i2 is the second row/column, + \param what is either GK_CSR_ROW or GK_CSR_COL indicating the type of + objects between the similarity will be computed, + \param simtype is the type of similarity and is one of GK_CSR_COS, + GK_CSR_JAC, GK_CSR_MIN, GK_CSR_AMIN + \returns the similarity between the two rows/columns. +*/ +/**************************************************************************/ +float gk_csr_ComputeSimilarity(gk_csr_t *mat, int i1, int i2, int what, + int simtype) +{ + int nind1, nind2; + int *ind1, *ind2; + float *val1, *val2, stat1, stat2, sim; + + switch (what) { + case GK_CSR_ROW: + if (!mat->rowptr) + gk_errexit(SIGERR, "Row-based view of the matrix does not exists.\n"); + nind1 = mat->rowptr[i1+1]-mat->rowptr[i1]; + nind2 = mat->rowptr[i2+1]-mat->rowptr[i2]; + ind1 = mat->rowind + mat->rowptr[i1]; + ind2 = mat->rowind + mat->rowptr[i2]; + val1 = mat->rowval + mat->rowptr[i1]; + val2 = mat->rowval + mat->rowptr[i2]; + break; + + case GK_CSR_COL: + if (!mat->colptr) + gk_errexit(SIGERR, "Column-based view of the matrix does not exists.\n"); + nind1 = mat->colptr[i1+1]-mat->colptr[i1]; + nind2 = mat->colptr[i2+1]-mat->colptr[i2]; + ind1 = mat->colind + mat->colptr[i1]; + ind2 = mat->colind + mat->colptr[i2]; + val1 = mat->colval + mat->colptr[i1]; + val2 = mat->colval + mat->colptr[i2]; + break; + + default: + gk_errexit(SIGERR, "Invalid index type of %d.\n", what); + return 0.0; + } + + + switch (simtype) { + case GK_CSR_COS: + case GK_CSR_JAC: + sim = stat1 = stat2 = 0.0; + i1 = i2 = 0; + while (i1 ind2[i2]) { + stat2 += val2[i2]*val2[i2]; + i2++; + } + else { + sim += val1[i1]*val2[i2]; + stat1 += val1[i1]*val1[i1]; + stat2 += val2[i2]*val2[i2]; + i1++; + i2++; + } + } + if (simtype == GK_CSR_COS) + sim = (stat1*stat2 > 0.0 ? sim/sqrt(stat1*stat2) : 0.0); + else + sim = (stat1+stat2-sim > 0.0 ? sim/(stat1+stat2-sim) : 0.0); + break; + + case GK_CSR_MIN: + sim = stat1 = stat2 = 0.0; + i1 = i2 = 0; + while (i1 ind2[i2]) { + stat2 += val2[i2]; + i2++; + } + else { + sim += gk_min(val1[i1],val2[i2]); + stat1 += val1[i1]; + stat2 += val2[i2]; + i1++; + i2++; + } + } + sim = (stat1+stat2-sim > 0.0 ? sim/(stat1+stat2-sim) : 0.0); + + break; + + case GK_CSR_AMIN: + sim = stat1 = stat2 = 0.0; + i1 = i2 = 0; + while (i1 ind2[i2]) { + stat2 += val2[i2]; + i2++; + } + else { + sim += gk_min(val1[i1],val2[i2]); + stat1 += val1[i1]; + stat2 += val2[i2]; + i1++; + i2++; + } + } + sim = (stat1 > 0.0 ? sim/stat1 : 0.0); + + break; + + default: + gk_errexit(SIGERR, "Unknown similarity measure %d\n", simtype); + return -1; + } + + return sim; + +} + + +/*************************************************************************/ +/*! Computes the similarity between two rows/columns + + \param mat_a the first matrix. The routine assumes that the indices + are sorted in increasing order. + \param mat_b the second matrix. The routine assumes that the indices + are sorted in increasing order. + \param i1 is the row/column from the first matrix (mat_a), + \param i2 is the row/column from the second matrix (mat_b), + \param what is either GK_CSR_ROW or GK_CSR_COL indicating the type of + objects between the similarity will be computed, + \param simtype is the type of similarity and is one of GK_CSR_COS, + GK_CSR_JAC, GK_CSR_MIN, GK_CSR_AMIN + \returns the similarity between the two rows/columns. +*/ +/**************************************************************************/ +float gk_csr_ComputePairSimilarity(gk_csr_t *mat_a, gk_csr_t *mat_b, + int i1, int i2, int what, int simtype) +{ + int nind1, nind2; + int *ind1, *ind2; + float *val1, *val2, stat1, stat2, sim; + + switch (what) { + case GK_CSR_ROW: + if (!mat_a->rowptr || !mat_b->rowptr) + gk_errexit(SIGERR, "Row-based view of the matrix does not exists.\n"); + nind1 = mat_a->rowptr[i1+1]-mat_a->rowptr[i1]; + nind2 = mat_b->rowptr[i2+1]-mat_b->rowptr[i2]; + ind1 = mat_a->rowind + mat_a->rowptr[i1]; + ind2 = mat_b->rowind + mat_b->rowptr[i2]; + val1 = mat_a->rowval + mat_a->rowptr[i1]; + val2 = mat_b->rowval + mat_b->rowptr[i2]; + break; + + case GK_CSR_COL: + if (!mat_a->colptr || !mat_b->colptr) + gk_errexit(SIGERR, "Column-based view of the matrix does not exists.\n"); + nind1 = mat_a->colptr[i1+1]-mat_a->colptr[i1]; + nind2 = mat_b->colptr[i2+1]-mat_b->colptr[i2]; + ind1 = mat_a->colind + mat_a->colptr[i1]; + ind2 = mat_b->colind + mat_b->colptr[i2]; + val1 = mat_a->colval + mat_a->colptr[i1]; + val2 = mat_b->colval + mat_b->colptr[i2]; + break; + + default: + gk_errexit(SIGERR, "Invalid index type of %d.\n", what); + return 0.0; + } + + + switch (simtype) { + case GK_CSR_COS: + case GK_CSR_JAC: + sim = stat1 = stat2 = 0.0; + i1 = i2 = 0; + while (i1 ind2[i2]) { + stat2 += val2[i2]*val2[i2]; + i2++; + } + else { + sim += val1[i1]*val2[i2]; + stat1 += val1[i1]*val1[i1]; + stat2 += val2[i2]*val2[i2]; + i1++; + i2++; + } + } + if (simtype == GK_CSR_COS) + sim = (stat1*stat2 > 0.0 ? sim/sqrt(stat1*stat2) : 0.0); + else + sim = (stat1+stat2-sim > 0.0 ? sim/(stat1+stat2-sim) : 0.0); + break; + + case GK_CSR_MIN: + sim = stat1 = stat2 = 0.0; + i1 = i2 = 0; + while (i1 ind2[i2]) { + stat2 += val2[i2]; + i2++; + } + else { + sim += gk_min(val1[i1],val2[i2]); + stat1 += val1[i1]; + stat2 += val2[i2]; + i1++; + i2++; + } + } + sim = (stat1+stat2-sim > 0.0 ? sim/(stat1+stat2-sim) : 0.0); + + break; + + case GK_CSR_AMIN: + sim = stat1 = stat2 = 0.0; + i1 = i2 = 0; + while (i1 ind2[i2]) { + stat2 += val2[i2]; + i2++; + } + else { + sim += gk_min(val1[i1],val2[i2]); + stat1 += val1[i1]; + stat2 += val2[i2]; + i1++; + i2++; + } + } + sim = (stat1 > 0.0 ? sim/stat1 : 0.0); + + break; + + default: + gk_errexit(SIGERR, "Unknown similarity measure %d\n", simtype); + return -1; + } + + return sim; + +} + +/*************************************************************************/ +/*! Finds the n most similar rows (neighbors) to the query. + + \param mat the matrix itself + \param nqterms is the number of columns in the query + \param qind is the list of query columns + \param qval is the list of correspodning query weights + \param simtype is the type of similarity and is one of GK_CSR_DOTP, + GK_CSR_COS, GK_CSR_JAC, GK_CSR_MIN, GK_CSR_AMIN. In case of + GK_CSR_COS, the rows and the query are assumed to be of unit + length. + \param nsim is the maximum number of requested most similar rows. + If -1 is provided, then everything is returned unsorted. + \param minsim is the minimum similarity of the requested most + similar rows + \param hits is the result set. This array should be at least + of length nsim. + \param i_marker is an array of size equal to the number of rows + whose values are initialized to -1. If NULL is provided + then this array is allocated and freed internally. + \param i_cand is an array of size equal to the number of rows. + If NULL is provided then this array is allocated and freed + internally. + \returns The number of identified most similar rows, which can be + smaller than the requested number of nnbrs in those cases + in which there are no sufficiently many neighbors. +*/ +/**************************************************************************/ +int gk_csr_GetSimilarRows(gk_csr_t *mat, int nqterms, int *qind, + float *qval, int simtype, int nsim, float minsim, gk_fkv_t *hits, + int *i_marker, gk_fkv_t *i_cand) +{ + ssize_t i, ii, j, k; + int nrows, ncols, ncand; + ssize_t *colptr; + int *colind, *marker; + float *colval, *rnorms, mynorm, *rsums, mysum; + gk_fkv_t *cand; + + if (nqterms == 0) + return 0; + + nrows = mat->nrows; + ncols = mat->ncols; + GKASSERT((colptr = mat->colptr) != NULL); + GKASSERT((colind = mat->colind) != NULL); + GKASSERT((colval = mat->colval) != NULL); + + marker = (i_marker ? i_marker : gk_ismalloc(nrows, -1, "gk_csr_SimilarRows: marker")); + cand = (i_cand ? i_cand : gk_fkvmalloc(nrows, "gk_csr_SimilarRows: cand")); + + switch (simtype) { + case GK_CSR_DOTP: + case GK_CSR_COS: + for (ncand=0, ii=0; iirnorms) != NULL); + mynorm = gk_fdot(nqterms, qval, 1, qval, 1); + + for (i=0; irsums) != NULL); + mysum = gk_fsum(nqterms, qval, 1); + + for (i=0; i= minsim) + cand[j++] = cand[i]; + } + ncand = j; + + if (nsim == -1 || nsim >= ncand) { + nsim = ncand; + } + else { + nsim = gk_min(nsim, ncand); + gk_dfkvkselect(ncand, nsim, cand); + gk_fkvsortd(nsim, cand); + } + + gk_fkvcopy(nsim, cand, hits); + + if (i_marker == NULL) + gk_free((void **)&marker, LTERM); + if (i_cand == NULL) + gk_free((void **)&cand, LTERM); + + return nsim; +} + + +/*************************************************************************/ +/*! Returns a symmetric version of a square matrix. The symmetric version + is constructed by applying an A op A^T operation, where op is one of + GK_CSR_SYM_SUM, GK_CSR_SYM_MIN, GK_CSR_SYM_MAX, GK_CSR_SYM_AVG. + + \param mat the matrix to be symmetrized, + \param op indicates the operation to be performed. The possible values are + GK_CSR_SYM_SUM, GK_CSR_SYM_MIN, GK_CSR_SYM_MAX, and GK_CSR_SYM_AVG. + + \returns the symmetrized matrix consisting only of its row-based structure. + The input matrix is not modified. +*/ +/**************************************************************************/ +gk_csr_t *gk_csr_MakeSymmetric(gk_csr_t *mat, int op) +{ + ssize_t i, j, k, nnz; + int nrows, nadj, hasvals; + ssize_t *rowptr, *colptr, *nrowptr; + int *rowind, *colind, *nrowind, *marker, *ids; + float *rowval=NULL, *colval=NULL, *nrowval=NULL, *wgts=NULL; + gk_csr_t *nmat; + + if (mat->nrows != mat->ncols) { + fprintf(stderr, "gk_csr_MakeSymmetric: The matrix needs to be square.\n"); + return NULL; + } + + hasvals = (mat->rowval != NULL); + + nrows = mat->nrows; + rowptr = mat->rowptr; + rowind = mat->rowind; + if (hasvals) + rowval = mat->rowval; + + /* create the column view for efficient processing */ + colptr = gk_zsmalloc(nrows+1, 0, "colptr"); + colind = gk_i32malloc(rowptr[nrows], "colind"); + if (hasvals) + colval = gk_fmalloc(rowptr[nrows], "colval"); + + for (i=0; inrows = mat->nrows; + nmat->ncols = mat->ncols; + + nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_MakeSymmetric: nrowptr"); + nrowind = nmat->rowind = gk_imalloc(2*rowptr[nrows], "gk_csr_MakeSymmetric: nrowind"); + if (hasvals) + nrowval = nmat->rowval = gk_fmalloc(2*rowptr[nrows], "gk_csr_MakeSymmetric: nrowval"); + + marker = gk_ismalloc(nrows, -1, "marker"); + ids = gk_imalloc(nrows, "ids"); + if (hasvals) + wgts = gk_fmalloc(nrows, "wgts"); + + nrowptr[0] = nnz = 0; + for (i=0; inrows+1. + \param cind is the indices structure of the CSR representation of + the components. The length of this vector must be mat->nrows. + \param cids is an array that stores the component # of each vertex + of the graph. The length of this vector must be mat->nrows. + + \returns the number of components that it found. + + \note The cptr, cind, and cids parameters can be NULL, in which case + only the number of connected components is returned. +*/ +/*************************************************************************/ +int gk_csr_FindConnectedComponents(gk_csr_t *mat, int32_t *cptr, int32_t *cind, + int32_t *cids) +{ + ssize_t i, ii, j, jj, k, nvtxs, first, last, ntodo, ncmps; + ssize_t *xadj; + int32_t *adjncy, *pos, *todo; + int32_t mustfree_ccsr=0, mustfree_where=0; + + if (mat->nrows != mat->ncols) { + fprintf(stderr, "gk_csr_FindComponents: The matrix needs to be square.\n"); + return -1; + } + + nvtxs = mat->nrows; + xadj = mat->rowptr; + adjncy = mat->rowind; + + /* Deal with NULL supplied cptr/cind vectors */ + if (cptr == NULL) { + cptr = gk_i32malloc(nvtxs+1, "gk_csr_FindComponents: cptr"); + cind = gk_i32malloc(nvtxs, "gk_csr_FindComponents: cind"); + mustfree_ccsr = 1; + } + + /* The list of vertices that have not been touched yet. + The valid entries are from [0..ntodo). */ + todo = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_csr_FindComponents: todo")); + + /* For a vertex that has not been visited, pos[i] is the position in the + todo list that this vertex is stored. + If a vertex has been visited, pos[i] = -1. */ + pos = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_csr_FindComponents: pos")); + + + /* Find the connected componends */ + ncmps = -1; + ntodo = nvtxs; /* All vertices have not been visited */ + first = last = 0; /* Point to the first and last vertices that have been touched + but not explored. + These vertices are stored in cind[first]...cind[last-1]. */ + + while (first < last || ntodo > 0) { + if (first == last) { /* Find another starting vertex */ + cptr[++ncmps] = first; /* Mark the end of the current CC */ + + /* put the first vertex in the todo list as the start of the new CC */ + ASSERT(pos[todo[0]] != -1); + cind[last++] = todo[0]; + + pos[todo[0]] = -1; + todo[0] = todo[--ntodo]; + pos[todo[0]] = 0; + } + + i = cind[first++]; /* Get the first visited but unexplored vertex */ + + for (j=xadj[i]; jnrows != mat->ncols) { + fprintf(stderr, "gk_csr_ReorderSymmetric: The matrix needs to be square.\n"); + return NULL; + } + + if (perm == NULL && iperm == NULL) + return NULL; + + nrows = mat->nrows; + rowptr = mat->rowptr; + rowind = mat->rowind; + rowval = mat->rowval; + + nmat = gk_csr_Create(); + + nmat->nrows = nrows; + nmat->ncols = nrows; + + nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_ReorderSymmetric: rowptr"); + nrowind = nmat->rowind = gk_i32malloc(rowptr[nrows], "gk_csr_ReorderSymmetric: rowind"); + nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_ReorderSymmetric: rowval"); + + /* allocate memory for the different structures present in the matrix */ + if (mat->rlabels) + nmat->rlabels = gk_i32malloc(nrows, "gk_csr_ReorderSymmetric: rlabels"); + if (mat->rmap) + nmat->rmap = gk_i32malloc(nrows, "gk_csr_ReorderSymmetric: rmap"); + if (mat->rnorms) + nmat->rnorms = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: rnorms"); + if (mat->rsums) + nmat->rsums = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: rsums"); + if (mat->rsizes) + nmat->rsizes = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: rsizes"); + if (mat->rvols) + nmat->rvols = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: rvols"); + if (mat->rwgts) + nmat->rwgts = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: rwgts"); + + if (mat->clabels) + nmat->clabels = gk_i32malloc(nrows, "gk_csr_ReorderSymmetric: clabels"); + if (mat->cmap) + nmat->cmap = gk_i32malloc(nrows, "gk_csr_ReorderSymmetric: cmap"); + if (mat->cnorms) + nmat->cnorms = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: cnorms"); + if (mat->csums) + nmat->csums = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: csums"); + if (mat->csizes) + nmat->csizes = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: csizes"); + if (mat->cvols) + nmat->cvols = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: cvols"); + if (mat->cwgts) + nmat->cwgts = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: cwgts"); + + + + /* create perm/iperm if not provided */ + if (perm == NULL) { + freeperm = 1; + perm = gk_i32malloc(nrows, "gk_csr_ReorderSymmetric: perm"); + for (i=0; irlabels) + nmat->rlabels[v] = mat->rlabels[u]; + if (mat->rmap) + nmat->rmap[v] = mat->rmap[u]; + if (mat->rnorms) + nmat->rnorms[v] = mat->rnorms[u]; + if (mat->rsums) + nmat->rsums[v] = mat->rsums[u]; + if (mat->rsizes) + nmat->rsizes[v] = mat->rsizes[u]; + if (mat->rvols) + nmat->rvols[v] = mat->rvols[u]; + if (mat->rwgts) + nmat->rwgts[v] = mat->rwgts[u]; + + if (mat->clabels) + nmat->clabels[v] = mat->clabels[u]; + if (mat->cmap) + nmat->cmap[v] = mat->cmap[u]; + if (mat->cnorms) + nmat->cnorms[v] = mat->cnorms[u]; + if (mat->csums) + nmat->csums[v] = mat->csums[u]; + if (mat->csizes) + nmat->csizes[v] = mat->csizes[u]; + if (mat->cvols) + nmat->cvols[v] = mat->cvols[u]; + if (mat->cwgts) + nmat->cwgts[v] = mat->cwgts[u]; + + nrowptr[v+1] = jj; + } + + + /* free memory */ + if (freeperm) + gk_free((void **)&perm, LTERM); + if (freeiperm) + gk_free((void **)&iperm, LTERM); + + return nmat; +} + + +/*************************************************************************/ +/*! This function computes a permutation of the rows/columns of a symmetric + matrix based on a breadth-first-traversal. It can be used for re-ordering + the matrix to reduce its bandwidth for better cache locality. + + \param[IN] mat is the matrix whose ordering to be computed. + \param[IN] maxdegree is the maximum number of nonzeros of the rows that + will participate in the BFS ordering. Rows with more nonzeros + will be put at the front of the ordering in decreasing degree + order. + \param[IN] v is the starting row of the BFS. A value of -1 indicates that + a randomly selected row will be used. + \param[OUT] perm[i] stores the ID of row i in the re-ordered matrix. + \param[OUT] iperm[i] stores the ID of the row that corresponds to + the ith vertex in the re-ordered matrix. + + \note The perm or iperm (but not both) can be NULL, at which point, + the corresponding arrays are not returned. Though the program + works fine when both are NULL, doing that is not smart. + The returned arrays should be freed with gk_free(). +*/ +/*************************************************************************/ +void gk_csr_ComputeBFSOrderingSymmetric(gk_csr_t *mat, int maxdegree, int v, + int32_t **r_perm, int32_t **r_iperm) +{ + int i, k, nrows, first, last; + ssize_t j, *rowptr; + int32_t *rowind, *cot, *pos; + + if (mat->nrows != mat->ncols) { + fprintf(stderr, "gk_csr_ComputeBFSOrderingSymmetric: The matrix needs to be square.\n"); + return; + } + if (maxdegree < mat->nrows && v != -1) { + fprintf(stderr, "gk_csr_ComputeBFSOrderingSymmetric: Since maxdegree node renumbering is requested the starting row should be -1.\n"); + return; + } + if (mat->nrows <= 0) + return; + + nrows = mat->nrows; + rowptr = mat->rowptr; + rowind = mat->rowind; + + /* This array will function like pos + touched of the CC method */ + pos = gk_i32incset(nrows, 0, gk_i32malloc(nrows, "gk_csr_ComputeBFSOrderingSymmetric: pos")); + + /* This array ([C]losed[O]pen[T]odo => cot) serves three purposes. + Positions from [0...first) is the current iperm[] vector of the explored rows; + Positions from [first...last) is the OPEN list (i.e., visited rows); + Positions from [last...nrows) is the todo list. */ + cot = gk_i32incset(nrows, 0, gk_i32malloc(nrows, "gk_csr_ComputeBFSOrderingSymmetric: cot")); + + first = last = 0; + + /* deal with maxdegree handling */ + if (maxdegree < nrows) { + last = nrows; + for (i=nrows-1; i>=0; i--) { + if (rowptr[i+1]-rowptr[i] < maxdegree) { + cot[--last] = i; + pos[i] = last; + } + else { + cot[first++] = i; + pos[i] = -1; + } + } + GKASSERT(first == last); + + if (last > 0) { /* reorder them in degree decreasing order */ + gk_ikv_t *cand = gk_ikvmalloc(first, "gk_csr_ComputeBFSOrderingSymmetric: cand"); + + for (i=0; inrows != mat->ncols) { + fprintf(stderr, "gk_csr_ComputeBestFOrderingSymmetric: The matrix needs to be square.\n"); + return; + } + if (mat->nrows <= 0) + return; + + nrows = mat->nrows; + rowptr = mat->rowptr; + rowind = mat->rowind; + + + /* the degree of the vertices in the closed list */ + degrees = gk_i32smalloc(nrows, 0, "gk_csr_ComputeBestFOrderingSymmetric: degrees"); + + /* the weighted degree of the vertices in the closed list for type==3 */ + wdegrees = gk_i32smalloc(nrows, 0, "gk_csr_ComputeBestFOrderingSymmetric: wdegrees"); + + /* the sum of differences for type==4 */ + sod = gk_i32smalloc(nrows, 0, "gk_csr_ComputeBestFOrderingSymmetric: sod"); + + /* the encountering level of a vertex type==5 */ + level = gk_i32smalloc(nrows, 0, "gk_csr_ComputeBestFOrderingSymmetric: level"); + + /* The open+todo list of vertices. + The vertices from [0..nopen] are the open vertices. + The vertices from [nopen..ntodo) are the todo vertices. + */ + ot = gk_i32incset(nrows, 0, gk_i32malloc(nrows, "gk_csr_ComputeBestFOrderingSymmetric: ot")); + + /* For a vertex that has not been explored, pos[i] is the position in the ot list. */ + pos = gk_i32incset(nrows, 0, gk_i32malloc(nrows, "gk_csr_ComputeBestFOrderingSymmetric: pos")); + + /* if perm[i] >= 0, then perm[i] is the order of vertex i; otherwise perm[i] == -1. */ + perm = gk_i32smalloc(nrows, -1, "gk_csr_ComputeBestFOrderingSymmetric: perm"); + + /* create the queue and put the starting vertex in it */ + queue = gk_i32pqCreate(nrows); + gk_i32pqInsert(queue, v, 1); + + /* put v at the front of the open list */ + pos[0] = ot[0] = v; + pos[v] = ot[v] = 0; + nopen = 1; + ntodo = nrows; + + /* start processing the nodes */ + for (i=0; i= nopen) + gk_errexit(SIGERR, "The position of v is not in open list. pos[%d]=%d is >=%d.\n", v, pos[v], nopen); + + /* remove v from the open list and re-arrange the todo part of the list */ + ot[pos[v]] = ot[nopen-1]; + pos[ot[nopen-1]] = pos[v]; + if (ntodo > nopen) { + ot[nopen-1] = ot[ntodo-1]; + pos[ot[ntodo-1]] = nopen-1; + } + nopen--; + ntodo--; + + for (j=rowptr[v]; j + + +/* These are the jmp_buf for the graceful exit in case of severe errors. + Multiple buffers are defined to allow for recursive invokation. */ +#define MAX_JBUFS 128 +__thread int gk_cur_jbufs=-1; +__thread jmp_buf gk_jbufs[MAX_JBUFS]; +__thread jmp_buf gk_jbuf; + +typedef void (*gksighandler_t)(int); + +/* These are the holders of the old singal handlers for the trapped signals */ +static __thread gksighandler_t old_SIGMEM_handler; /* Custom signal */ +static __thread gksighandler_t old_SIGERR_handler; /* Custom signal */ +static __thread gksighandler_t old_SIGMEM_handlers[MAX_JBUFS]; /* Custom signal */ +static __thread gksighandler_t old_SIGERR_handlers[MAX_JBUFS]; /* Custom signal */ + +/* The following is used to control if the gk_errexit() will actually abort or not. + There is always a single copy of this variable */ +static int gk_exit_on_error = 1; + + +/*************************************************************************/ +/*! This function sets the gk_exit_on_error variable + */ +/*************************************************************************/ +void gk_set_exit_on_error(int value) +{ + gk_exit_on_error = value; +} + + + +/*************************************************************************/ +/*! This function prints an error message and exits + */ +/*************************************************************************/ +void errexit(char *f_str,...) +{ + va_list argp; + + va_start(argp, f_str); + vfprintf(stderr, f_str, argp); + va_end(argp); + + if (strlen(f_str) == 0 || f_str[strlen(f_str)-1] != '\n') + fprintf(stderr,"\n"); + fflush(stderr); + + if (gk_exit_on_error) + exit(-2); + + /* abort(); */ +} + + +/*************************************************************************/ +/*! This function prints an error message and raises a signum signal + */ +/*************************************************************************/ +void gk_errexit(int signum, char *f_str,...) +{ + va_list argp; + + va_start(argp, f_str); + vfprintf(stderr, f_str, argp); + va_end(argp); + + fprintf(stderr,"\n"); + fflush(stderr); + + if (gk_exit_on_error) + raise(signum); +} + + +/***************************************************************************/ +/*! This function sets a number of signal handlers and sets the return point + of a longjmp +*/ +/***************************************************************************/ +int gk_sigtrap() +{ + if (gk_cur_jbufs+1 >= MAX_JBUFS) + return 0; + + gk_cur_jbufs++; + + old_SIGMEM_handlers[gk_cur_jbufs] = signal(SIGMEM, gk_sigthrow); + old_SIGERR_handlers[gk_cur_jbufs] = signal(SIGERR, gk_sigthrow); + + return 1; +} + + +/***************************************************************************/ +/*! This function sets the handlers for the signals to their default handlers + */ +/***************************************************************************/ +int gk_siguntrap() +{ + if (gk_cur_jbufs == -1) + return 0; + + signal(SIGMEM, old_SIGMEM_handlers[gk_cur_jbufs]); + signal(SIGERR, old_SIGERR_handlers[gk_cur_jbufs]); + + gk_cur_jbufs--; + + return 1; +} + + +/*************************************************************************/ +/*! This function is the custome signal handler, which all it does is to + perform a longjump to the most recent saved environment + */ +/*************************************************************************/ +void gk_sigthrow(int signum) +{ + longjmp(gk_jbufs[gk_cur_jbufs], signum); +} + + +/*************************************************************************** +* This function sets a number of signal handlers and sets the return point +* of a longjmp +****************************************************************************/ +void gk_SetSignalHandlers() +{ + old_SIGMEM_handler = signal(SIGMEM, gk_NonLocalExit_Handler); + old_SIGERR_handler = signal(SIGERR, gk_NonLocalExit_Handler); +} + + +/*************************************************************************** +* This function sets the handlers for the signals to their default handlers +****************************************************************************/ +void gk_UnsetSignalHandlers() +{ + signal(SIGMEM, old_SIGMEM_handler); + signal(SIGERR, old_SIGERR_handler); +} + + +/************************************************************************* +* This function is the handler for SIGUSR1 that implements the cleaning up +* process prior to a non-local exit. +**************************************************************************/ +void gk_NonLocalExit_Handler(int signum) +{ + longjmp(gk_jbuf, signum); +} + + +/*************************************************************************/ +/*! \brief Thread-safe implementation of strerror() */ +/**************************************************************************/ +char *gk_strerror(int errnum) +{ +#if defined(WIN32) || defined(__MINGW32__) + return strerror(errnum); +#else +#ifndef SUNOS + static __thread char buf[1024]; + + strerror_r(errnum, buf, 1024); + + buf[1023] = '\0'; + return buf; +#else + return strerror(errnum); +#endif +#endif +} + + + +/************************************************************************* +* This function prints a backtrace of calling functions +**************************************************************************/ +void PrintBackTrace() +{ +#ifdef HAVE_EXECINFO_H + void *array[10]; + int i, size; + char **strings; + + size = backtrace(array, 10); + strings = backtrace_symbols(array, size); + + printf("Obtained %d stack frames.\n", size); + for (i=0; i + +/********************************************************************** + * This function computes the max accuracy score of a ranked list, + * given +1/-1 class list + **********************************************************************/ +float ComputeAccuracy(int n, gk_fkv_t *list) +{ + int i, P, N, TP, FN = 0; + float bAccuracy = 0.0; + float acc; + + for (P=0, i=0;i bAccuracy) + bAccuracy = acc; + } + + return bAccuracy; +} + + +/***************************************************************************** + * This function computes the ROC score of a ranked list, given a +1/-1 class + * list. + ******************************************************************************/ +float ComputeROCn(int n, int maxN, gk_fkv_t *list) +{ + int i, P, TP, FP, TPprev, FPprev, AUC; + float prev; + + FP = TP = FPprev = TPprev = AUC = 0; + prev = list[0].key -1; + + for (P=0, i=0; i 0 ? (float)(1.0*AUC/(P*FP)) : 0.0); +} + + +/***************************************************************************** +* This function computes the median rate of false positive for each positive +* instance. +******************************************************************************/ +float ComputeMedianRFP(int n, gk_fkv_t *list) +{ + int i, P, N, TP, FP; + + P = N = 0; + for (i=0; i + +/* Byte-wise swap two items of size SIZE. */ +#define QSSWAP(a, b, stmp) do { stmp = (a); (a) = (b); (b) = stmp; } while (0) + + +/******************************************************************************/ +/*! This function puts the 'topk' largest values in the beginning of the array */ +/*******************************************************************************/ +int gk_dfkvkselect(size_t n, int topk, gk_fkv_t *cand) +{ + int i, j, lo, hi, mid; + gk_fkv_t stmp; + float pivot; + + if (n <= topk) + return n; /* return if the array has fewer elements than we want */ + + for (lo=0, hi=n-1; lo < hi;) { + mid = lo + ((hi-lo) >> 1); + + /* select the median */ + if (cand[lo].key < cand[mid].key) + mid = lo; + if (cand[hi].key > cand[mid].key) + mid = hi; + else + goto jump_over; + if (cand[lo].key < cand[mid].key) + mid = lo; + +jump_over: + QSSWAP(cand[mid], cand[hi], stmp); + pivot = cand[hi].key; + + /* the partitioning algorithm */ + for (i=lo-1, j=lo; j= pivot) { + i++; + QSSWAP(cand[i], cand[j], stmp); + } + } + i++; + QSSWAP(cand[i], cand[hi], stmp); + + + if (i > topk) + hi = i-1; + else if (i < topk) + lo = i+1; + else + break; + } + +/* + if (cand[lo].key < cand[hi].key) + printf("Hmm Error: %d %d %d %f %f\n", i, lo, hi, cand[lo].key, cand[hi].key); + + + for (i=topk; i cand[j].key) + printf("Hmm Error: %d %d %f %f %d %d\n", i, j, cand[i].key, cand[j].key, lo, hi); + } +*/ + + return topk; +} + + +/******************************************************************************/ +/*! This function puts the 'topk' smallest values in the beginning of the array */ +/*******************************************************************************/ +int gk_ifkvkselect(size_t n, int topk, gk_fkv_t *cand) +{ + int i, j, lo, hi, mid; + gk_fkv_t stmp; + float pivot; + + if (n <= topk) + return n; /* return if the array has fewer elements than we want */ + + for (lo=0, hi=n-1; lo < hi;) { + mid = lo + ((hi-lo) >> 1); + + /* select the median */ + if (cand[lo].key > cand[mid].key) + mid = lo; + if (cand[hi].key < cand[mid].key) + mid = hi; + else + goto jump_over; + if (cand[lo].key > cand[mid].key) + mid = lo; + +jump_over: + QSSWAP(cand[mid], cand[hi], stmp); + pivot = cand[hi].key; + + /* the partitioning algorithm */ + for (i=lo-1, j=lo; j topk) + hi = i-1; + else if (i < topk) + lo = i+1; + else + break; + } + +/* + if (cand[lo].key > cand[hi].key) + printf("Hmm Error: %d %d %d %f %f\n", i, lo, hi, cand[lo].key, cand[hi].key); + + + for (i=topk; i + + + +/************************************************************************* +* This function checks if a file exists +**************************************************************************/ +int gk_fexists(char *fname) +{ + struct stat status; + + if (stat(fname, &status) == -1) + return 0; + + return S_ISREG(status.st_mode); +} + + +/************************************************************************* +* This function checks if a directory exists +**************************************************************************/ +int gk_dexists(char *dirname) +{ + struct stat status; + + if (stat(dirname, &status) == -1) + return 0; + + return S_ISDIR(status.st_mode); +} + + +/*************************************************************************/ +/*! \brief Returns the size of the file in bytes + +This function returns the size of a file as a 64 bit integer. If there +were any errors in stat'ing the file, -1 is returned. +\note That due to the -1 return code, the maximum file size is limited to + 63 bits (which I guess is okay for now). +*/ +/**************************************************************************/ +ssize_t gk_getfsize(char *filename) +{ + struct stat status; + + if (stat(filename, &status) == -1) + return -1; + + return (size_t)(status.st_size); +} + + +/*************************************************************************/ +/*! This function gets some basic statistics about the file. + \param fname is the name of the file + \param r_nlines is the number of lines in the file. If it is NULL, + this information is not returned. + \param r_ntokens is the number of tokens in the file. If it is NULL, + this information is not returned. + \param r_max_nlntokens is the maximum number of tokens in any line + in the file. If it is NULL this information is not returned. + \param r_nbytes is the number of bytes in the file. If it is NULL, + this information is not returned. +*/ +/*************************************************************************/ +void gk_getfilestats(char *fname, size_t *r_nlines, size_t *r_ntokens, + size_t *r_max_nlntokens, size_t *r_nbytes) +{ + size_t nlines=0, ntokens=0, max_nlntokens=0, nbytes=0, oldntokens=0, nread; + int intoken=0; + char buffer[4097], *cptr; + FILE *fpin; + + fpin = gk_fopen(fname, "r", "gk_GetFileStats"); + + while (!feof(fpin)) { + nread = fread(buffer, sizeof(char), 4096, fpin); + nbytes += nread; + + buffer[nread] = '\0'; /* There is space for this one */ + for (cptr=buffer; *cptr!='\0'; cptr++) { + if (*cptr == '\n') { + nlines++; + ntokens += intoken; + intoken = 0; + if (max_nlntokens < ntokens-oldntokens) + max_nlntokens = ntokens-oldntokens; + oldntokens = ntokens; + } + else if (*cptr == ' ' || *cptr == '\t') { + ntokens += intoken; + intoken = 0; + } + else { + intoken = 1; + } + } + } + ntokens += intoken; + if (max_nlntokens < ntokens-oldntokens) + max_nlntokens = ntokens-oldntokens; + + gk_fclose(fpin); + + if (r_nlines != NULL) + *r_nlines = nlines; + if (r_ntokens != NULL) + *r_ntokens = ntokens; + if (r_max_nlntokens != NULL) + *r_max_nlntokens = max_nlntokens; + if (r_nbytes != NULL) + *r_nbytes = nbytes; +} + + +/************************************************************************* +* This function takes in a potentially full path specification of a file +* and just returns a string containing just the basename of the file. +* The basename is derived from the actual filename by stripping the last +* .ext part. +**************************************************************************/ +char *gk_getbasename(char *path) +{ + char *startptr, *endptr; + char *basename; + + if ((startptr = strrchr(path, '/')) == NULL) + startptr = path; + else + startptr = startptr+1; + + basename = gk_strdup(startptr); + + if ((endptr = strrchr(basename, '.')) != NULL) + *endptr = '\0'; + + return basename; +} + +/************************************************************************* +* This function takes in a potentially full path specification of a file +* and just returns a string corresponding to its file extension. The +* extension of a file is considered to be the string right after the +* last '.' character. +**************************************************************************/ +char *gk_getextname(char *path) +{ + char *startptr; + + if ((startptr = strrchr(path, '.')) == NULL) + return gk_strdup(path); + else + return gk_strdup(startptr+1); +} + +/************************************************************************* +* This function takes in a potentially full path specification of a file +* and just returns a string containing just the filename. +**************************************************************************/ +char *gk_getfilename(char *path) +{ + char *startptr; + + if ((startptr = strrchr(path, '/')) == NULL) + return gk_strdup(path); + else + return gk_strdup(startptr+1); +} + +/************************************************************************* +* This function takes in a potentially full path specification of a file +* and extracts the directory path component if it exists, otherwise it +* returns "./" as the path. The memory for it is dynamically allocated. +**************************************************************************/ +char *getpathname(char *path) +{ + char *endptr, *tmp; + + if ((endptr = strrchr(path, '/')) == NULL) { + return gk_strdup("."); + } + else { + tmp = gk_strdup(path); + *(strrchr(tmp, '/')) = '\0'; + return tmp; + } +} + + + +/************************************************************************* +* This function creates a path +**************************************************************************/ +int gk_mkpath(char *pathname) +{ + char tmp[2048]; + + sprintf(tmp, "mkdir -p %s", pathname); + return system(tmp); +} + + +/************************************************************************* +* This function deletes a directory tree and all of its contents +**************************************************************************/ +int gk_rmpath(char *pathname) +{ + char tmp[2048]; + + sprintf(tmp, "rm -r %s", pathname); + return system(tmp); +} diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/getopt.c b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/getopt.c new file mode 100644 index 00000000..2e7e042e --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/getopt.c @@ -0,0 +1,855 @@ +/*************************************************************************/ +/*! \file getopt.c +\brief Command line parsing + +This file contains a implementation of GNU's Getopt facility. The purpose +for including it here is to ensure portability across different unix- and +windows-based systems. + +\warning +The implementation provided here uses the \c gk_ prefix for all variables +used by the standard Getopt facility to communicate with the program. +So, do read the documentation here. + +\verbatim + Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001 + Free Software Foundation, Inc. This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. +\endverbatim +*/ +/*************************************************************************/ + + +#include + +/*************************************************************************/ +/* Local function prototypes */ +/*************************************************************************/ +static void exchange (char **); +static char *gk_getopt_initialize (int, char **, char *); +static int gk_getopt_internal(int argc, char **argv, char *optstring, + struct gk_option *longopts, int *longind, int long_only); + + + +/*************************************************************************/ +/*! \brief For communication arguments to the caller. + +This variable is set by getopt to point at the value of the option argument, +for those options that accept arguments. +*/ +/*************************************************************************/ +char *gk_optarg; + + +/*************************************************************************/ +/*! \brief Index in ARGV of the next element to be scanned. + +This variable is set by getopt to the index of the next element of the argv +array to be processed. Once getopt has found all of the option arguments, +you can use this variable to determine where the remaining non-option arguments +begin. +*/ +/*************************************************************************/ +int gk_optind = 1; + + +/*************************************************************************/ +/*! \brief Controls error reporting for unrecognized options. + +If the value of this variable is nonzero, then getopt prints an error +message to the standard error stream if it encounters an unknown option +character or an option with a missing required argument. This is the default +behavior. If you set this variable to zero, getopt does not print any messages, +but it still returns the character ? to indicate an error. +*/ +/*************************************************************************/ +int gk_opterr = 1; + + +/*************************************************************************/ +/*! \brief Stores unknown option characters + +When getopt encounters an unknown option character or an option with a +missing required argument, it stores that option character in this +variable. You can use this for providing your own diagnostic messages. +*/ +/*************************************************************************/ +int gk_optopt = '?'; + + +/*************************************************************************/ +/* +Records that the getopt facility has been initialized. +*/ +/*************************************************************************/ +int gk_getopt_initialized; + + +/*************************************************************************/ +/* +The next char to be scanned in the option-element in which the last option +character we returned was found. This allows us to pick up the scan where +we left off. + +If this is zero, or a null string, it means resume the scan by advancing +to the next ARGV-element. +*/ +/*************************************************************************/ +static char *nextchar; + + +/*************************************************************************/ +/* +Value of POSIXLY_CORRECT environment variable. +*/ +/*************************************************************************/ +static char *posixly_correct; + + +/*************************************************************************/ +/* +Describe how to deal with options that follow non-option ARGV-elements. + +If the caller did not specify anything, the default is REQUIRE_ORDER if +the environment variable POSIXLY_CORRECT is defined, PERMUTE otherwise. + +REQUIRE_ORDER means don't recognize them as options; stop option processing +when the first non-option is seen. This is what Unix does. This mode of +operation is selected by either setting the environment variable +POSIXLY_CORRECT, or using `+' as the first character of the list of +option characters. + +PERMUTE is the default. We permute the contents of ARGV as we scan, so +that eventually all the non-options are at the end. This allows options +to be given in any order, even with programs that were not written to +expect this. + +RETURN_IN_ORDER is an option available to programs that were written +to expect options and other ARGV-elements in any order and that care +about the ordering of the two. We describe each non-option ARGV-element +as if it were the argument of an option with character code 1. +Using `-' as the first character of the list of option characters +selects this mode of operation. + +The special argument `--' forces an end of option-scanning regardless +of the value of `ordering'. In the case of RETURN_IN_ORDER, only +`--' can cause `getopt' to return -1 with `gk_optind' != ARGC. +*/ +/*************************************************************************/ +static enum +{ + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER +} ordering; + + + +/*************************************************************************/ +/* +Describe the part of ARGV that contains non-options that have +been skipped. `first_nonopt' is the index in ARGV of the first of them; +`last_nonopt' is the index after the last of them. +*/ +/*************************************************************************/ +static int first_nonopt; +static int last_nonopt; + + + + + +/*************************************************************************/ +/* +Handle permutation of arguments. + +Exchange two adjacent subsequences of ARGV. +One subsequence is elements [first_nonopt,last_nonopt) +which contains all the non-options that have been skipped so far. +The other is elements [last_nonopt,gk_optind), which contains all +the options processed since those non-options were skipped. + +`first_nonopt' and `last_nonopt' are relocated so that they describe +the new indices of the non-options in ARGV after they are moved. +*/ +/*************************************************************************/ +static void exchange (char **argv) +{ + int bottom = first_nonopt; + int middle = last_nonopt; + int top = gk_optind; + char *tem; + + /* Exchange the shorter segment with the far end of the longer segment. + That puts the shorter segment into the right place. + It leaves the longer segment in the right place overall, + but it consists of two parts that need to be swapped next. */ + + while (top > middle && middle > bottom) { + if (top - middle > middle - bottom) { + /* Bottom segment is the short one. */ + int len = middle - bottom; + register int i; + + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } + else { + /* Top segment is the short one. */ + int len = top - middle; + register int i; + + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } + } + + /* Update records for the slots the non-options now occupy. */ + + first_nonopt += (gk_optind - last_nonopt); + last_nonopt = gk_optind; +} + + + +/*************************************************************************/ +/* +Initialize the internal data when the first call is made. +*/ +/*************************************************************************/ +static char *gk_getopt_initialize (int argc, char **argv, char *optstring) +{ + /* Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + first_nonopt = last_nonopt = gk_optind; + + nextchar = NULL; + + posixly_correct = getenv("POSIXLY_CORRECT"); + + /* Determine how to handle the ordering of options and nonoptions. */ + if (optstring[0] == '-') { + ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') { + ordering = REQUIRE_ORDER; + ++optstring; + } + else if (posixly_correct != NULL) + ordering = REQUIRE_ORDER; + else + ordering = PERMUTE; + + return optstring; +} + + +/*************************************************************************/ +/* + Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `gk_optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns -1. + Then `gk_optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `gk_opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `gk_optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `gk_optarg', otherwise `gk_optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + LONGOPTS is a vector of `struct gk_option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. +*/ +/*************************************************************************/ +static int gk_getopt_internal(int argc, char **argv, char *optstring, + struct gk_option *longopts, int *longind, int long_only) +{ + int print_errors = gk_opterr; + if (optstring[0] == ':') + print_errors = 0; + + if (argc < 1) + return -1; + + gk_optarg = NULL; + + if (gk_optind == 0 || !gk_getopt_initialized) { + if (gk_optind == 0) + gk_optind = 1; /* Don't scan ARGV[0], the program name. */ + + optstring = gk_getopt_initialize (argc, argv, optstring); + gk_getopt_initialized = 1; + } + + /* Test whether ARGV[gk_optind] points to a non-option argument. + Either it does not have option syntax, or there is an environment flag + from the shell indicating it is not an option. The later information + is only used when the used in the GNU libc. */ +# define NONOPTION_P (argv[gk_optind][0] != '-' || argv[gk_optind][1] == '\0') + + if (nextchar == NULL || *nextchar == '\0') { + /* Advance to the next ARGV-element. */ + + /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been + moved back by the user (who may also have changed the arguments). */ + if (last_nonopt > gk_optind) + last_nonopt = gk_optind; + if (first_nonopt > gk_optind) + first_nonopt = gk_optind; + + if (ordering == PERMUTE) { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (first_nonopt != last_nonopt && last_nonopt != gk_optind) + exchange ((char **) argv); + else if (last_nonopt != gk_optind) + first_nonopt = gk_optind; + + /* Skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (gk_optind < argc && NONOPTION_P) + gk_optind++; + + last_nonopt = gk_optind; + } + + /* The special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (gk_optind != argc && !strcmp (argv[gk_optind], "--")) { + gk_optind++; + + if (first_nonopt != last_nonopt && last_nonopt != gk_optind) + exchange ((char **) argv); + else if (first_nonopt == last_nonopt) + first_nonopt = gk_optind; + last_nonopt = argc; + + gk_optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (gk_optind == argc) { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (first_nonopt != last_nonopt) + gk_optind = first_nonopt; + return -1; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if (NONOPTION_P) { + if (ordering == REQUIRE_ORDER) + return -1; + gk_optarg = argv[gk_optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Skip the initial punctuation. */ + + nextchar = (argv[gk_optind] + 1 + (longopts != NULL && argv[gk_optind][1] == '-')); + } + + /* Decode the current option-ARGV-element. */ + + /* Check whether the ARGV-element is a long option. + + If long_only and the ARGV-element has the form "-f", where f is + a valid short option, don't consider it an abbreviated form of + a long option that starts with f. Otherwise there would be no + way to give the -f short option. + + On the other hand, if there's a long option "fubar" and + the ARGV-element is "-fu", do consider that an abbreviation of + the long option, just like "--fu", and not "-f" with arg "u". + + This distinction seems to be the most useful approach. */ + + if (longopts != NULL && (argv[gk_optind][1] == '-' || (long_only && (argv[gk_optind][2] || !strchr(optstring, argv[gk_optind][1]))))) { + char *nameend; + struct gk_option *p; + struct gk_option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = -1; + int option_index; + + for (nameend = nextchar; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) { + if (!strncmp (p->name, nextchar, nameend - nextchar)) { + if ((unsigned int) (nameend - nextchar) == (unsigned int) strlen (p->name)) { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else if (long_only || pfound->has_arg != p->has_arg || pfound->flag != p->flag || pfound->val != p->val) + /* Second or later nonexact match found. */ + ambig = 1; + } + } + + if (ambig && !exact) { + if (print_errors) + fprintf(stderr, "%s: option `%s' is ambiguous\n", argv[0], argv[gk_optind]); + + nextchar += strlen (nextchar); + gk_optind++; + gk_optopt = 0; + return '?'; + } + + if (pfound != NULL) { + option_index = indfound; + gk_optind++; + if (*nameend) { + /* Don't test has_arg with >, because some C compilers don't allow it to be used on enums. */ + if (pfound->has_arg) + gk_optarg = nameend + 1; + else { + if (print_errors) { + if (argv[gk_optind - 1][1] == '-') + /* --option */ + fprintf(stderr, "%s: option `--%s' doesn't allow an argument\n", argv[0], pfound->name); + else + /* +option or -option */ + fprintf(stderr, "%s: option `%c%s' doesn't allow an argument\n", argv[0], argv[gk_optind - 1][0], pfound->name); + } + + nextchar += strlen (nextchar); + + gk_optopt = pfound->val; + return '?'; + } + } + else if (pfound->has_arg == 1) { + if (gk_optind < argc) + gk_optarg = argv[gk_optind++]; + else { + if (print_errors) + fprintf(stderr, "%s: option `%s' requires an argument\n", argv[0], argv[gk_optind - 1]); + nextchar += strlen (nextchar); + gk_optopt = pfound->val; + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. Otherwise interpret it as a short option. */ + if (!long_only || argv[gk_optind][1] == '-' || strchr(optstring, *nextchar) == NULL) { + if (print_errors) { + if (argv[gk_optind][1] == '-') + /* --option */ + fprintf(stderr, "%s: unrecognized option `--%s'\n", argv[0], nextchar); + else + /* +option or -option */ + fprintf(stderr, "%s: unrecognized option `%c%s'\n", argv[0], argv[gk_optind][0], nextchar); + } + nextchar = (char *) ""; + gk_optind++; + gk_optopt = 0; + return '?'; + } + } + + /* Look at and handle the next short option-character. */ + { + char c = *nextchar++; + char *temp = strchr(optstring, c); + + /* Increment `gk_optind' when we start to process its last character. */ + if (*nextchar == '\0') + ++gk_optind; + + if (temp == NULL || c == ':') { + if (print_errors) { + if (posixly_correct) + /* 1003.2 specifies the format of this message. */ + fprintf(stderr, "%s: illegal option -- %c\n", argv[0], c); + else + fprintf(stderr, "%s: invalid option -- %c\n", argv[0], c); + } + gk_optopt = c; + return '?'; + } + + /* Convenience. Treat POSIX -W foo same as long option --foo */ + if (temp[0] == 'W' && temp[1] == ';') { + char *nameend; + struct gk_option *p; + struct gk_option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = 0; + int option_index; + + /* This is an option that requires an argument. */ + if (*nextchar != '\0') { + gk_optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + gk_optind++; + } + else if (gk_optind == argc) { + if (print_errors) { + /* 1003.2 specifies the format of this message. */ + fprintf(stderr, "%s: option requires an argument -- %c\n", argv[0], c); + } + gk_optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + return c; + } + else + /* We already incremented `gk_optind' once; increment it again when taking next ARGV-elt as argument. */ + gk_optarg = argv[gk_optind++]; + + /* gk_optarg is now the argument, see if it's in the table of longopts. */ + + for (nextchar = nameend = gk_optarg; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) { + if (!strncmp (p->name, nextchar, nameend - nextchar)) { + if ((unsigned int) (nameend - nextchar) == strlen (p->name)) { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second or later nonexact match found. */ + ambig = 1; + } + } + if (ambig && !exact) { + if (print_errors) + fprintf(stderr, "%s: option `-W %s' is ambiguous\n", argv[0], argv[gk_optind]); + nextchar += strlen (nextchar); + gk_optind++; + return '?'; + } + if (pfound != NULL) { + option_index = indfound; + if (*nameend) { + /* Don't test has_arg with >, because some C compilers don't allow it to be used on enums. */ + if (pfound->has_arg) + gk_optarg = nameend + 1; + else { + if (print_errors) + fprintf(stderr, "%s: option `-W %s' doesn't allow an argument\n", argv[0], pfound->name); + + nextchar += strlen (nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) { + if (gk_optind < argc) + gk_optarg = argv[gk_optind++]; + else { + if (print_errors) + fprintf(stderr, "%s: option `%s' requires an argument\n", argv[0], argv[gk_optind - 1]); + nextchar += strlen (nextchar); + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + nextchar = NULL; + return 'W'; /* Let the application handle it. */ + } + + if (temp[1] == ':') { + if (temp[2] == ':') { + /* This is an option that accepts an argument optionally. */ + if (*nextchar != '\0') { + gk_optarg = nextchar; + gk_optind++; + } + else + gk_optarg = NULL; + nextchar = NULL; + } + else { + /* This is an option that requires an argument. */ + if (*nextchar != '\0') { + gk_optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, we must advance to the next element now. */ + gk_optind++; + } + else if (gk_optind == argc) { + if (print_errors) { + /* 1003.2 specifies the format of this message. */ + fprintf(stderr, "%s: option requires an argument -- %c\n", argv[0], c); + } + gk_optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented `gk_optind' once; increment it again when taking next ARGV-elt as argument. */ + gk_optarg = argv[gk_optind++]; + nextchar = NULL; + } + } + return c; + } +} + + + +/*************************************************************************/ +/*! \brief Parse command-line arguments + +The gk_getopt() function gets the next option argument from the argument +list specified by the \c argv and \c argc arguments. Normally these values +come directly from the arguments received by main(). + +\param argc is the number of command line arguments passed to main(). +\param argv is an array of strings storing the above command line + arguments. +\param options is a string that specifies the option characters that + are valid for this program. An option character in this string + can be followed by a colon (`:') to indicate that it takes a + required argument. If an option character is followed by two + colons (`::'), its argument is optional; this is a GNU extension. + +\return +It returns the option character for the next command line option. When no +more option arguments are available, it returns -1. There may still be +more non-option arguments; you must compare the external variable +#gk_optind against the \c argc parameter to check this. + +\return +If the option has an argument, gk_getopt() returns the argument by storing +it in the variable #gk_optarg. You don't ordinarily need to copy the +#gk_optarg string, since it is a pointer into the original \c argv array, +not into a static area that might be overwritten. + +\return +If gk_getopt() finds an option character in \c argv that was not included +in options, or a missing option argument, it returns `?' and sets the +external variable #gk_optopt to the actual option character. +If the first character of options is a colon (`:'), then gk_getopt() +returns `:' instead of `?' to indicate a missing option argument. +In addition, if the external variable #gk_opterr is nonzero (which is +the default), gk_getopt() prints an error message. This variable is +set by gk_getopt() to point at the value of the option argument, +for those options that accept arguments. + + +gk_getopt() has three ways to deal with options that follow non-options +\c argv elements. The special argument `--' forces in all cases +the end of option scanning. + - The default is to permute the contents of \c argv while scanning it + so that eventually all the non-options are at the end. This allows + options to be given in any order, even with programs that were not + written to expect this. + - If the options argument string begins with a hyphen (`-'), this is + treated specially. It permits arguments that are not options to be + returned as if they were associated with option character `\\1'. + - POSIX demands the following behavior: The first non-option stops + option processing. This mode is selected by either setting the + environment variable POSIXLY_CORRECT or beginning the options + argument string with a plus sign (`+'). + +*/ +/*************************************************************************/ +int gk_getopt(int argc, char **argv, char *options) +{ + return gk_getopt_internal(argc, argv, options, NULL, NULL, 0); +} + + +/*************************************************************************/ +/*! \brief Parse command-line arguments with long options + +This function accepts GNU-style long options as well as single-character +options. + +\param argc is the number of command line arguments passed to main(). +\param argv is an array of strings storing the above command line + arguments. +\param options describes the short options to accept, just as it does + in gk_getopt(). +\param long_options describes the long options to accept. See the + defintion of ::gk_option for more information. +\param opt_index this is a returned variable. For any long option, + gk_getopt_long() tells you the index in the array \c long_options + of the options definition, by storing it into *opt_index. + You can get the name of the option with longopts[*opt_index].name. + So you can distinguish among long options either by the values + in their val fields or by their indices. You can also distinguish + in this way among long options that set flags. + + +\return +When gk_getopt_long() encounters a short option, it does the same thing +that gk_getopt() would do: it returns the character code for the option, +and stores the options argument (if it has one) in #gk_optarg. + +\return +When gk_getopt_long() encounters a long option, it takes actions based +on the flag and val fields of the definition of that option. + +\return +If flag is a null pointer, then gk_getopt_long() returns the contents +of val to indicate which option it found. You should arrange distinct +values in the val field for options with different meanings, so you +can decode these values after gk_getopt_long() returns. If the long +option is equivalent to a short option, you can use the short option's +character code in val. + +\return +If flag is not a null pointer, that means this option should just set +a flag in the program. The flag is a variable of type int that you +define. Put the address of the flag in the flag field. Put in the +val field the value you would like this option to store in the flag. +In this case, gk_getopt_long() returns 0. + +\return +When a long option has an argument, gk_getopt_long() puts the argument +value in the variable #gk_optarg before returning. When the option has +no argument, the value in #gk_optarg is a null pointer. This is +how you can tell whether an optional argument was supplied. + +\return +When gk_getopt_long() has no more options to handle, it returns -1, +and leaves in the variable #gk_optind the index in argv of the next +remaining argument. +*/ +/*************************************************************************/ +int gk_getopt_long( int argc, char **argv, char *options, + struct gk_option *long_options, int *opt_index) +{ + return gk_getopt_internal (argc, argv, options, long_options, opt_index, 0); +} + + + +/*************************************************************************/ +/*! \brief Parse command-line arguments with only long options + +Like gk_getopt_long(), but '-' as well as '--' can indicate a long option. +If an option that starts with '-' (not '--') doesn't match a long option, +but does match a short option, it is parsed as a short option instead. +*/ +/*************************************************************************/ +int gk_getopt_long_only(int argc, char **argv, char *options, + struct gk_option *long_options, int *opt_index) +{ + return gk_getopt_internal(argc, argv, options, long_options, opt_index, 1); +} + diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_arch.h b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_arch.h new file mode 100644 index 00000000..b82fb6a6 --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_arch.h @@ -0,0 +1,70 @@ +/*! +\file gk_arch.h +\brief This file contains various architecture-specific declerations + +\date Started 3/27/2007 +\author George +\version\verbatim $Id: gk_arch.h 21637 2018-01-03 22:37:24Z karypis $ \endverbatim +*/ + +#ifndef _GK_ARCH_H_ +#define _GK_ARCH_H_ + +/************************************************************************* +* Architecture-specific differences in header files +**************************************************************************/ +#ifdef LINUX +#if !defined(__USE_XOPEN) +#define __USE_XOPEN +#endif +#if !defined(_XOPEN_SOURCE) +#define _XOPEN_SOURCE 600 +#endif +#if !defined(__USE_XOPEN2K) +#define __USE_XOPEN2K +#endif +#endif + + +#ifdef HAVE_EXECINFO_H +#include +#endif + + +#ifdef __MSC__ + #include "gk_ms_stdint.h" + #include "gk_ms_inttypes.h" + #include "gk_ms_stat.h" + #include "win32/adapt.h" +#else +#ifndef SUNOS + #include +#endif + #include + #include +#ifndef __MINGW32__ + #include +#endif + #include + #include +#endif + + +/************************************************************************* +* Architecture-specific modifications +**************************************************************************/ +#ifdef WIN32 +typedef ptrdiff_t ssize_t; +#endif + + +#ifdef SUNOS +#define PTRDIFF_MAX INT64_MAX +#endif + +/* MSC does not have INFINITY defined */ +#ifndef INFINITY +#define INFINITY FLT_MAX +#endif + +#endif diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_defs.h b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_defs.h new file mode 100644 index 00000000..68cb9a4c --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_defs.h @@ -0,0 +1,87 @@ +/*! +\file gk_defs.h +\brief This file contains various constants definitions + +\date Started 3/27/2007 +\author George +\version\verbatim $Id: gk_defs.h 22039 2018-05-26 16:34:48Z karypis $ \endverbatim +*/ + +#ifndef _GK_DEFS_H_ +#define _GK_DEFS_H_ + + +#define LTERM (void **) 0 /* List terminator for GKfree() */ + +/* mopt_t types */ +#define GK_MOPT_MARK 1 +#define GK_MOPT_CORE 2 +#define GK_MOPT_HEAP 3 + +#define HTABLE_EMPTY -1 +#define HTABLE_DELETED -2 +#define HTABLE_FIRST 1 +#define HTABLE_NEXT 2 + +/* pdb corruption bit switches */ +#define CRP_ALTLOCS 1 +#define CRP_MISSINGCA 2 +#define CRP_MISSINGBB 4 +#define CRP_MULTICHAIN 8 +#define CRP_MULTICA 16 +#define CRP_MULTIBB 32 + +#define MAXLINELEN 300000 + +/* GKlib signals to standard signal mapping */ +#define SIGMEM SIGABRT +#define SIGERR SIGTERM + + +/* CSR-related defines */ +#define GK_CSR_ROW 1 +#define GK_CSR_COL 2 +#define GK_CSR_ROWCOL 3 + +#define GK_CSR_MAXTF 1 +#define GK_CSR_SQRT 2 +#define GK_CSR_POW25 3 +#define GK_CSR_POW65 4 +#define GK_CSR_POW75 5 +#define GK_CSR_POW85 6 +#define GK_CSR_LOG 7 +#define GK_CSR_IDF 8 +#define GK_CSR_IDF2 9 +#define GK_CSR_MAXTF2 10 + +#define GK_CSR_DOTP 1 +#define GK_CSR_COS 2 +#define GK_CSR_JAC 3 +#define GK_CSR_MIN 4 +#define GK_CSR_AMIN 5 + +#define GK_CSR_FMT_AUTO 2 +#define GK_CSR_FMT_CLUTO 1 +#define GK_CSR_FMT_CSR 2 +#define GK_CSR_FMT_METIS 3 +#define GK_CSR_FMT_BINROW 4 +#define GK_CSR_FMT_BINCOL 5 +#define GK_CSR_FMT_IJV 6 +#define GK_CSR_FMT_BIJV 7 + +#define GK_CSR_SYM_SUM 1 +#define GK_CSR_SYM_MIN 2 +#define GK_CSR_SYM_MAX 3 +#define GK_CSR_SYM_AVG 4 + + +#define GK_GRAPH_FMT_METIS 1 +#define GK_GRAPH_FMT_IJV 2 +#define GK_GRAPH_FMT_HIJV 3 + +#define GK_GRAPH_SYM_SUM 1 +#define GK_GRAPH_SYM_MIN 2 +#define GK_GRAPH_SYM_MAX 3 +#define GK_GRAPH_SYM_AVG 4 + +#endif diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_externs.h b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_externs.h new file mode 100644 index 00000000..2c0fdd96 --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_externs.h @@ -0,0 +1,25 @@ +/*! +\file gk_externs.h +\brief This file contains definitions of external variables created by GKlib + +\date Started 3/27/2007 +\author George +\version\verbatim $Id: gk_externs.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim +*/ + +#ifndef _GK_EXTERNS_H_ +#define _GK_EXTERNS_H_ + + +/************************************************************************* +* Extern variable definition. Hopefully, the __thread makes them thread-safe. +**************************************************************************/ +#ifndef _GK_ERROR_C_ +/* declared in error.c */ +extern __thread int gk_cur_jbufs; +extern __thread jmp_buf gk_jbufs[]; +extern __thread jmp_buf gk_jbuf; + +#endif + +#endif diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_getopt.h b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_getopt.h new file mode 100644 index 00000000..597c0802 --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_getopt.h @@ -0,0 +1,64 @@ +/*! +\file gk_getopt.h +\brief This file contains GNU's externs/structs/prototypes + +\date Started 3/27/2007 +\author George +\version\verbatim $Id: gk_getopt.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim +*/ + +#ifndef _GK_GETOPT_H_ +#define _GK_GETOPT_H_ + + +/* Externals from getopt.c */ +extern char *gk_optarg; +extern int gk_optind; +extern int gk_opterr; +extern int gk_optopt; + + +/*! \brief The structure that stores the information about the command-line options + +This structure describes a single long option name for the sake of +gk_getopt_long(). The argument long_options must be an array +of these structures, one for each long option. Terminate the array with +an element containing all zeros. +*/ +struct gk_option { + char *name; /*!< This field is the name of the option. */ + int has_arg; /*!< This field says whether the option takes an argument. + It is an integer, and there are three legitimate values: + no_argument, required_argument and optional_argument. + */ + int *flag; /*!< See the discussion on ::gk_option#val */ + int val; /*!< These fields control how to report or act on the option + when it occurs. + + If flag is a null pointer, then the val is a value which + identifies this option. Often these values are chosen + to uniquely identify particular long options. + + If flag is not a null pointer, it should be the address + of an int variable which is the flag for this option. + The value in val is the value to store in the flag to + indicate that the option was seen. */ +}; + +/* Names for the values of the `has_arg' field of `struct gk_option'. */ +#define no_argument 0 +#define required_argument 1 +#define optional_argument 2 + + +/* Function prototypes */ +extern int gk_getopt(int argc, char **argv, char *shortopts); +extern int gk_getopt_long(int argc, char **argv, char *shortopts, + struct gk_option *longopts, int *longind); +extern int gk_getopt_long_only (int argc, char **argv, + char *shortopts, struct gk_option *longopts, int *longind); + + + +#endif + diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_macros.h b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_macros.h new file mode 100644 index 00000000..c3f1b453 --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_macros.h @@ -0,0 +1,169 @@ +/*! +\file gk_macros.h +\brief This file contains various macros + +\date Started 3/27/2007 +\author George +\version\verbatim $Id: gk_macros.h 15048 2013-08-31 19:38:14Z karypis $ \endverbatim +*/ + +#ifndef _GK_MACROS_H_ +#define _GK_MACROS_H_ + +/*------------------------------------------------------------- + * Usefull commands + *-------------------------------------------------------------*/ +#define gk_max(a, b) ((a) >= (b) ? (a) : (b)) +#define gk_min(a, b) ((a) >= (b) ? (b) : (a)) +#define gk_max3(a, b, c) ((a) >= (b) && (a) >= (c) ? (a) : ((b) >= (a) && (b) >= (c) ? (b) : (c))) +#define gk_SWAP(a, b, tmp) do {(tmp) = (a); (a) = (b); (b) = (tmp);} while(0) +#define INC_DEC(a, b, val) do {(a) += (val); (b) -= (val);} while(0) +#define sign(a, b) ((a >= 0 ? b : -b)) + +#define ONEOVERRANDMAX (1.0/(RAND_MAX+1.0)) +#define RandomInRange(u) ((int) (ONEOVERRANDMAX*(u)*rand())) +#define RandomInRange_r(s, u) ((int) (ONEOVERRANDMAX*(u)*rand_r(s))) + +#define gk_abs(x) ((x) >= 0 ? (x) : -(x)) + + +/*------------------------------------------------------------- + * Timing macros + *-------------------------------------------------------------*/ +#define gk_clearcputimer(tmr) (tmr = 0.0) +#define gk_startcputimer(tmr) (tmr -= gk_CPUSeconds()) +#define gk_stopcputimer(tmr) (tmr += gk_CPUSeconds()) +#define gk_getcputimer(tmr) (tmr) + +#define gk_clearwctimer(tmr) (tmr = 0.0) +#define gk_startwctimer(tmr) (tmr -= gk_WClockSeconds()) +#define gk_stopwctimer(tmr) (tmr += gk_WClockSeconds()) +#define gk_getwctimer(tmr) (tmr) + +/*------------------------------------------------------------- + * dbglvl handling macros + *-------------------------------------------------------------*/ +#define IFSET(a, flag, cmd) if ((a)&(flag)) (cmd); + + +/*------------------------------------------------------------- + * gracefull library exit macro + *-------------------------------------------------------------*/ +#define GKSETJMP() (setjmp(gk_return_to_entry)) +#define gk_sigcatch() (setjmp(gk_jbufs[gk_cur_jbufs])) + + +/*------------------------------------------------------------- + * Debuging memory leaks + *-------------------------------------------------------------*/ +#ifdef DMALLOC +# define MALLOC_CHECK(ptr) \ + if (malloc_verify((ptr)) == DMALLOC_VERIFY_ERROR) { \ + printf("***MALLOC_CHECK failed on line %d of file %s: " #ptr "\n", \ + __LINE__, __FILE__); \ + abort(); \ + } +#else +# define MALLOC_CHECK(ptr) ; +#endif + + +/*------------------------------------------------------------- + * CSR conversion macros + *-------------------------------------------------------------*/ +#define MAKECSR(i, n, a) \ + do { \ + for (i=1; i0; i--) a[i] = a[i-1]; \ + a[0] = 0; \ + } while(0) + +#define SHIFTCSR(i, n, a) \ + do { \ + for (i=n; i>0; i--) a[i] = a[i-1]; \ + a[0] = 0; \ + } while(0) + + +/*------------------------------------------------------------- + * ASSERTS that cannot be turned off! + *-------------------------------------------------------------*/ +#define GKASSERT(expr) \ + if (!(expr)) { \ + printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \ + __LINE__, __FILE__); \ + abort(); \ + } + +#define GKASSERTP(expr,msg) \ + if (!(expr)) { \ + printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \ + __LINE__, __FILE__); \ + printf msg ; \ + printf("\n"); \ + abort(); \ + } + +#define GKCUASSERT(expr) \ + if (!(expr)) { \ + printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \ + __LINE__, __FILE__); \ + } + +#define GKWARN(expr) \ + if (!(expr)) { \ + printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \ + __LINE__, __FILE__); \ + } + +#define GKCUASSERTP(expr,msg) \ + if (!(expr)) { \ + printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \ + __LINE__, __FILE__); \ + printf msg ; \ + printf("\n"); \ + } + +#define GKWARNP(expr,msg) \ + if (!(expr)) { \ + printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \ + __LINE__, __FILE__); \ + printf msg ; \ + printf("\n"); \ + } + + +/*------------------------------------------------------------- + * Program Assertions + *-------------------------------------------------------------*/ +#ifndef NDEBUG +# define ASSERT(expr) \ + if (!(expr)) { \ + printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \ + __LINE__, __FILE__); \ + assert(expr); \ + } + +# define ASSERTP(expr,msg) \ + if (!(expr)) { \ + printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \ + __LINE__, __FILE__); \ + printf msg ; \ + printf("\n"); \ + assert(expr); \ + } +#else +# define ASSERT(expr) ; +# define ASSERTP(expr,msg) ; +#endif + +#ifndef NDEBUG2 +# define ASSERT2 ASSERT +# define ASSERTP2 ASSERTP +#else +# define ASSERT2(expr) ; +# define ASSERTP2(expr,msg) ; +#endif + + +#endif diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_mkblas.h b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_mkblas.h new file mode 100644 index 00000000..1231669d --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_mkblas.h @@ -0,0 +1,203 @@ +/*! +\file gk_mkblas.h +\brief Templates for BLAS-like routines + +\date Started 3/28/07 +\author George +\version\verbatim $Id: gk_mkblas.h 16304 2014-02-25 14:27:19Z karypis $ \endverbatim +*/ + +#ifndef _GK_MKBLAS_H_ +#define _GK_MKBLAS_H_ + + +#define GK_MKBLAS(PRFX, TYPE, OUTTYPE) \ +/*************************************************************************/\ +/*! The macro for gk_?incset()-class of routines */\ +/*************************************************************************/\ +TYPE *PRFX ## incset(size_t n, TYPE baseval, TYPE *x)\ +{\ + size_t i;\ +\ + for (i=0; i max ? (*x) : max);\ +\ + return max;\ +}\ +\ +\ +/*************************************************************************/\ +/*! The macro for gk_?min()-class of routines */\ +/*************************************************************************/\ +TYPE PRFX ## min(size_t n, TYPE *x, size_t incx)\ +{\ + size_t i;\ + TYPE min;\ +\ + if (n <= 0) return (TYPE) 0;\ +\ + for (min=(*x), x+=incx, i=1; i x[max] ? j : max);\ +\ + return (size_t)(max/incx);\ +}\ +\ +\ +/*************************************************************************/\ +/*! The macro for gk_?argmin()-class of routines */\ +/*************************************************************************/\ +size_t PRFX ## argmin(size_t n, TYPE *x, size_t incx)\ +{\ + size_t i, j, min=0;\ +\ + for (i=1, j=incx; i 0 ? (OUTTYPE)sqrt((double)partial) : (OUTTYPE)0);\ +}\ +\ +\ +/*************************************************************************/\ +/*! The macro for gk_?dot()-class of routines */\ +/**************************************************************************/\ +OUTTYPE PRFX ## dot(size_t n, TYPE *x, size_t incx, TYPE *y, size_t incy)\ +{\ + size_t i;\ + OUTTYPE partial = 0.0;\ + \ + for (i=0; innodes = 0;\ + queue->maxnodes = maxnodes;\ +\ + queue->heap = KVMALLOC(maxnodes, "gk_PQInit: heap");\ + queue->locator = gk_idxsmalloc(maxnodes, -1, "gk_PQInit: locator");\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function resets the priority queue */\ +/**************************************************************************/\ +void FPRFX ## Reset(PQT *queue)\ +{\ + ssize_t i;\ + ssize_t *locator=queue->locator;\ + KVT *heap=queue->heap;\ +\ + for (i=queue->nnodes-1; i>=0; i--)\ + locator[heap[i].val] = -1;\ + queue->nnodes = 0;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function frees the internal datastructures of the priority queue */\ +/**************************************************************************/\ +void FPRFX ## Free(PQT *queue)\ +{\ + if (queue == NULL) return;\ + gk_free((void **)&queue->heap, &queue->locator, LTERM);\ + queue->maxnodes = 0;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function frees the internal datastructures of the priority queue \ + and the queue itself */\ +/**************************************************************************/\ +void FPRFX ## Destroy(PQT *queue)\ +{\ + if (queue == NULL) return;\ + FPRFX ## Free(queue);\ + gk_free((void **)&queue, LTERM);\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function returns the length of the queue */\ +/**************************************************************************/\ +size_t FPRFX ## Length(PQT *queue)\ +{\ + return queue->nnodes;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function adds an item in the priority queue */\ +/**************************************************************************/\ +int FPRFX ## Insert(PQT *queue, VT node, KT key)\ +{\ + ssize_t i, j;\ + ssize_t *locator=queue->locator;\ + KVT *heap=queue->heap;\ +\ + ASSERT2(FPRFX ## CheckHeap(queue));\ +\ + ASSERT(locator[node] == -1);\ +\ + i = queue->nnodes++;\ + while (i > 0) {\ + j = (i-1)>>1;\ + if (KEY_LT(key, heap[j].key)) {\ + heap[i] = heap[j];\ + locator[heap[i].val] = i;\ + i = j;\ + }\ + else\ + break;\ + }\ + ASSERT(i >= 0);\ + heap[i].key = key;\ + heap[i].val = node;\ + locator[node] = i;\ +\ + ASSERT2(FPRFX ## CheckHeap(queue));\ +\ + return 0;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function deletes an item from the priority queue */\ +/**************************************************************************/\ +int FPRFX ## Delete(PQT *queue, VT node)\ +{\ + ssize_t i, j;\ + size_t nnodes;\ + KT newkey, oldkey;\ + ssize_t *locator=queue->locator;\ + KVT *heap=queue->heap;\ +\ + ASSERT(locator[node] != -1);\ + ASSERT(heap[locator[node]].val == node);\ +\ + ASSERT2(FPRFX ## CheckHeap(queue));\ +\ + i = locator[node];\ + locator[node] = -1;\ +\ + if (--queue->nnodes > 0 && heap[queue->nnodes].val != node) {\ + node = heap[queue->nnodes].val;\ + newkey = heap[queue->nnodes].key;\ + oldkey = heap[i].key;\ +\ + if (KEY_LT(newkey, oldkey)) { /* Filter-up */\ + while (i > 0) {\ + j = (i-1)>>1;\ + if (KEY_LT(newkey, heap[j].key)) {\ + heap[i] = heap[j];\ + locator[heap[i].val] = i;\ + i = j;\ + }\ + else\ + break;\ + }\ + }\ + else { /* Filter down */\ + nnodes = queue->nnodes;\ + while ((j=(i<<1)+1) < nnodes) {\ + if (KEY_LT(heap[j].key, newkey)) {\ + if (j+1 < nnodes && KEY_LT(heap[j+1].key, heap[j].key))\ + j++;\ + heap[i] = heap[j];\ + locator[heap[i].val] = i;\ + i = j;\ + }\ + else if (j+1 < nnodes && KEY_LT(heap[j+1].key, newkey)) {\ + j++;\ + heap[i] = heap[j];\ + locator[heap[i].val] = i;\ + i = j;\ + }\ + else\ + break;\ + }\ + }\ +\ + heap[i].key = newkey;\ + heap[i].val = node;\ + locator[node] = i;\ + }\ +\ + ASSERT2(FPRFX ## CheckHeap(queue));\ +\ + return 0;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function updates the key values associated for a particular item */ \ +/**************************************************************************/\ +void FPRFX ## Update(PQT *queue, VT node, KT newkey)\ +{\ + ssize_t i, j;\ + size_t nnodes;\ + KT oldkey;\ + ssize_t *locator=queue->locator;\ + KVT *heap=queue->heap;\ +\ + oldkey = heap[locator[node]].key;\ + if (!KEY_LT(newkey, oldkey) && !KEY_LT(oldkey, newkey)) return;\ +\ + ASSERT(locator[node] != -1);\ + ASSERT(heap[locator[node]].val == node);\ + ASSERT2(FPRFX ## CheckHeap(queue));\ +\ + i = locator[node];\ +\ + if (KEY_LT(newkey, oldkey)) { /* Filter-up */\ + while (i > 0) {\ + j = (i-1)>>1;\ + if (KEY_LT(newkey, heap[j].key)) {\ + heap[i] = heap[j];\ + locator[heap[i].val] = i;\ + i = j;\ + }\ + else\ + break;\ + }\ + }\ + else { /* Filter down */\ + nnodes = queue->nnodes;\ + while ((j=(i<<1)+1) < nnodes) {\ + if (KEY_LT(heap[j].key, newkey)) {\ + if (j+1 < nnodes && KEY_LT(heap[j+1].key, heap[j].key))\ + j++;\ + heap[i] = heap[j];\ + locator[heap[i].val] = i;\ + i = j;\ + }\ + else if (j+1 < nnodes && KEY_LT(heap[j+1].key, newkey)) {\ + j++;\ + heap[i] = heap[j];\ + locator[heap[i].val] = i;\ + i = j;\ + }\ + else\ + break;\ + }\ + }\ +\ + heap[i].key = newkey;\ + heap[i].val = node;\ + locator[node] = i;\ +\ + ASSERT2(FPRFX ## CheckHeap(queue));\ +\ + return;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function returns the item at the top of the queue and removes\ + it from the priority queue */\ +/**************************************************************************/\ +VT FPRFX ## GetTop(PQT *queue)\ +{\ + ssize_t i, j;\ + ssize_t *locator;\ + KVT *heap;\ + VT vtx, node;\ + KT key;\ +\ + ASSERT2(FPRFX ## CheckHeap(queue));\ +\ + if (queue->nnodes == 0)\ + return -1;\ +\ + queue->nnodes--;\ +\ + heap = queue->heap;\ + locator = queue->locator;\ +\ + vtx = heap[0].val;\ + locator[vtx] = -1;\ +\ + if ((i = queue->nnodes) > 0) {\ + key = heap[i].key;\ + node = heap[i].val;\ + i = 0;\ + while ((j=2*i+1) < queue->nnodes) {\ + if (KEY_LT(heap[j].key, key)) {\ + if (j+1 < queue->nnodes && KEY_LT(heap[j+1].key, heap[j].key))\ + j = j+1;\ + heap[i] = heap[j];\ + locator[heap[i].val] = i;\ + i = j;\ + }\ + else if (j+1 < queue->nnodes && KEY_LT(heap[j+1].key, key)) {\ + j = j+1;\ + heap[i] = heap[j];\ + locator[heap[i].val] = i;\ + i = j;\ + }\ + else\ + break;\ + }\ +\ + heap[i].key = key;\ + heap[i].val = node;\ + locator[node] = i;\ + }\ +\ + ASSERT2(FPRFX ## CheckHeap(queue));\ + return vtx;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function returns the item at the top of the queue. The item is not\ + deleted from the queue. */\ +/**************************************************************************/\ +VT FPRFX ## SeeTopVal(PQT *queue)\ +{\ + return (queue->nnodes == 0 ? -1 : queue->heap[0].val);\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function returns the key of the top item. The item is not\ + deleted from the queue. */\ +/**************************************************************************/\ +KT FPRFX ## SeeTopKey(PQT *queue)\ +{\ + return (queue->nnodes == 0 ? KMAX : queue->heap[0].key);\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function returns the key of a specific item */\ +/**************************************************************************/\ +KT FPRFX ## SeeKey(PQT *queue, VT node)\ +{\ + ssize_t *locator;\ + KVT *heap;\ +\ + heap = queue->heap;\ + locator = queue->locator;\ +\ + return heap[locator[node]].key;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function returns the first item in a breadth-first traversal of\ + the heap whose key is less than maxwgt. This function is here due to\ + hMETIS and is not general!*/\ +/**************************************************************************/\ +/*\ +VT FPRFX ## SeeConstraintTop(PQT *queue, KT maxwgt, KT *wgts)\ +{\ + ssize_t i;\ +\ + if (queue->nnodes == 0)\ + return -1;\ +\ + if (maxwgt <= 1000)\ + return FPRFX ## SeeTopVal(queue);\ +\ + for (i=0; innodes; i++) {\ + if (queue->heap[i].key > 0) {\ + if (wgts[queue->heap[i].val] <= maxwgt)\ + return queue->heap[i].val;\ + }\ + else {\ + if (queue->heap[i/2].key <= 0)\ + break;\ + }\ + }\ +\ + return queue->heap[0].val;\ +\ +}\ +*/\ +\ +\ +/*************************************************************************/\ +/*! This functions checks the consistency of the heap */\ +/**************************************************************************/\ +int FPRFX ## CheckHeap(PQT *queue)\ +{\ + ssize_t i, j;\ + size_t nnodes;\ + ssize_t *locator;\ + KVT *heap;\ +\ + heap = queue->heap;\ + locator = queue->locator;\ + nnodes = queue->nnodes;\ +\ + if (nnodes == 0)\ + return 1;\ +\ + ASSERT(locator[heap[0].val] == 0);\ + for (i=1; imaxnodes; i++) {\ + if (locator[i] != -1)\ + j++;\ + }\ + ASSERTP(j == nnodes, ("%jd %jd\n", (intmax_t)j, (intmax_t)nnodes));\ +\ + return 1;\ +}\ + + +#define GK_MKPQUEUE_PROTO(FPRFX, PQT, KT, VT)\ + PQT * FPRFX ## Create(size_t maxnodes);\ + void FPRFX ## Init(PQT *queue, size_t maxnodes);\ + void FPRFX ## Reset(PQT *queue);\ + void FPRFX ## Free(PQT *queue);\ + void FPRFX ## Destroy(PQT *queue);\ + size_t FPRFX ## Length(PQT *queue);\ + int FPRFX ## Insert(PQT *queue, VT node, KT key);\ + int FPRFX ## Delete(PQT *queue, VT node);\ + void FPRFX ## Update(PQT *queue, VT node, KT newkey);\ + VT FPRFX ## GetTop(PQT *queue);\ + VT FPRFX ## SeeTopVal(PQT *queue);\ + KT FPRFX ## SeeTopKey(PQT *queue);\ + KT FPRFX ## SeeKey(PQT *queue, VT node);\ + VT FPRFX ## SeeConstraintTop(PQT *queue, KT maxwgt, KT *wgts);\ + int FPRFX ## CheckHeap(PQT *queue);\ + + +/* This is how these macros are used +GK_MKPQUEUE(gk_dkvPQ, gk_dkvPQ_t, double, gk_idx_t, gk_dkvmalloc, DBL_MAX) +GK_MKPQUEUE_PROTO(gk_dkvPQ, gk_dkvPQ_t, double, gk_idx_t) +*/ + + +#endif diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_mkpqueue2.h b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_mkpqueue2.h new file mode 100644 index 00000000..10e8ee46 --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_mkpqueue2.h @@ -0,0 +1,215 @@ +/*! +\file gk_mkpqueue2.h +\brief Templates for priority queues that do not utilize locators and as such + they can use different types of values. + +\date Started 4/09/07 +\author George +\version\verbatim $Id: gk_mkpqueue2.h 13005 2012-10-23 22:34:36Z karypis $ \endverbatim +*/ + + +#ifndef _GK_MKPQUEUE2_H +#define _GK_MKPQUEUE2_H + + +#define GK_MKPQUEUE2(FPRFX, PQT, KT, VT, KMALLOC, VMALLOC, KMAX, KEY_LT)\ +/*************************************************************************/\ +/*! This function creates and initializes a priority queue */\ +/**************************************************************************/\ +PQT *FPRFX ## Create2(ssize_t maxnodes)\ +{\ + PQT *queue; \ +\ + if ((queue = (PQT *)gk_malloc(sizeof(PQT), "gk_pqCreate2: queue")) != NULL) {\ + memset(queue, 0, sizeof(PQT));\ + queue->nnodes = 0;\ + queue->maxnodes = maxnodes;\ + queue->keys = KMALLOC(maxnodes, "gk_pqCreate2: keys");\ + queue->vals = VMALLOC(maxnodes, "gk_pqCreate2: vals");\ +\ + if (queue->keys == NULL || queue->vals == NULL)\ + gk_free((void **)&queue->keys, &queue->vals, &queue, LTERM);\ + }\ +\ + return queue;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function resets the priority queue */\ +/**************************************************************************/\ +void FPRFX ## Reset2(PQT *queue)\ +{\ + queue->nnodes = 0;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function frees the internal datastructures of the priority queue */\ +/**************************************************************************/\ +void FPRFX ## Destroy2(PQT **r_queue)\ +{\ + PQT *queue = *r_queue; \ + if (queue == NULL) return;\ + gk_free((void **)&queue->keys, &queue->vals, &queue, LTERM);\ + *r_queue = NULL;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function returns the length of the queue */\ +/**************************************************************************/\ +size_t FPRFX ## Length2(PQT *queue)\ +{\ + return queue->nnodes;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function adds an item in the priority queue. */\ +/**************************************************************************/\ +int FPRFX ## Insert2(PQT *queue, VT val, KT key)\ +{\ + ssize_t i, j;\ + KT *keys=queue->keys;\ + VT *vals=queue->vals;\ +\ + ASSERT2(FPRFX ## CheckHeap2(queue));\ +\ + if (queue->nnodes == queue->maxnodes) \ + return 0;\ +\ + ASSERT2(FPRFX ## CheckHeap2(queue));\ +\ + i = queue->nnodes++;\ + while (i > 0) {\ + j = (i-1)>>1;\ + if (KEY_LT(key, keys[j])) {\ + keys[i] = keys[j];\ + vals[i] = vals[j];\ + i = j;\ + }\ + else\ + break;\ + }\ + ASSERT(i >= 0);\ + keys[i] = key;\ + vals[i] = val;\ +\ + ASSERT2(FPRFX ## CheckHeap2(queue));\ +\ + return 1;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function returns the item at the top of the queue and removes\ + it from the priority queue */\ +/**************************************************************************/\ +int FPRFX ## GetTop2(PQT *queue, VT *r_val)\ +{\ + ssize_t i, j;\ + KT key, *keys=queue->keys;\ + VT val, *vals=queue->vals;\ +\ + ASSERT2(FPRFX ## CheckHeap2(queue));\ +\ + if (queue->nnodes == 0)\ + return 0;\ +\ + queue->nnodes--;\ +\ + *r_val = vals[0];\ +\ + if ((i = queue->nnodes) > 0) {\ + key = keys[i];\ + val = vals[i];\ + i = 0;\ + while ((j=2*i+1) < queue->nnodes) {\ + if (KEY_LT(keys[j], key)) {\ + if (j+1 < queue->nnodes && KEY_LT(keys[j+1], keys[j]))\ + j = j+1;\ + keys[i] = keys[j];\ + vals[i] = vals[j];\ + i = j;\ + }\ + else if (j+1 < queue->nnodes && KEY_LT(keys[j+1], key)) {\ + j = j+1;\ + keys[i] = keys[j];\ + vals[i] = vals[j];\ + i = j;\ + }\ + else\ + break;\ + }\ +\ + keys[i] = key;\ + vals[i] = val;\ + }\ +\ + ASSERT2(FPRFX ## CheckHeap2(queue));\ +\ + return 1;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function returns the item at the top of the queue. The item is not\ + deleted from the queue. */\ +/**************************************************************************/\ +int FPRFX ## SeeTopVal2(PQT *queue, VT *r_val)\ +{\ + if (queue->nnodes == 0) \ + return 0;\ +\ + *r_val = queue->vals[0];\ +\ + return 1;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function returns the key of the top item. The item is not\ + deleted from the queue. */\ +/**************************************************************************/\ +KT FPRFX ## SeeTopKey2(PQT *queue)\ +{\ + return (queue->nnodes == 0 ? KMAX : queue->keys[0]);\ +}\ +\ +\ +/*************************************************************************/\ +/*! This functions checks the consistency of the heap */\ +/**************************************************************************/\ +int FPRFX ## CheckHeap2(PQT *queue)\ +{\ + ssize_t i;\ + KT *keys=queue->keys;\ +\ + if (queue->nnodes == 0)\ + return 1;\ +\ + for (i=1; innodes; i++) {\ + ASSERT(!KEY_LT(keys[i], keys[(i-1)/2]));\ + }\ + for (i=1; innodes; i++)\ + ASSERT(!KEY_LT(keys[i], keys[0]));\ +\ + return 1;\ +}\ + + +#define GK_MKPQUEUE2_PROTO(FPRFX, PQT, KT, VT)\ + PQT * FPRFX ## Create2(ssize_t maxnodes);\ + void FPRFX ## Reset2(PQT *queue);\ + void FPRFX ## Destroy2(PQT **r_queue);\ + size_t FPRFX ## Length2(PQT *queue);\ + int FPRFX ## Insert2(PQT *queue, VT node, KT key);\ + int FPRFX ## GetTop2(PQT *queue, VT *r_val);\ + int FPRFX ## SeeTopVal2(PQT *queue, VT *r_val);\ + KT FPRFX ## SeeTopKey2(PQT *queue);\ + int FPRFX ## CheckHeap2(PQT *queue);\ + + +#endif diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_mkrandom.h b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_mkrandom.h new file mode 100644 index 00000000..68d54fa3 --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_mkrandom.h @@ -0,0 +1,123 @@ +/*! +\file +\brief Templates for portable random number generation + +\date Started 5/17/07 +\author George +\version\verbatim $Id: gk_mkrandom.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim +*/ + + +#ifndef _GK_MKRANDOM_H +#define _GK_MKRANDOM_H + +/*************************************************************************/\ +/*! The generator for the rand() related routines. \ + \params RNGT the datatype that defines the range of values over which\ + random numbers will be generated\ + \params VALT the datatype that defines the contents of the array to \ + be permuted by randArrayPermute() \ + \params FPRFX the function prefix \ +*/\ +/**************************************************************************/\ +#define GK_MKRANDOM(FPRFX, RNGT, VALT)\ +/*************************************************************************/\ +/*! Initializes the generator */ \ +/**************************************************************************/\ +void FPRFX ## srand(RNGT seed) \ +{\ + gk_randinit((uint64_t) seed);\ +}\ +\ +\ +/*************************************************************************/\ +/*! Returns a random number */ \ +/**************************************************************************/\ +RNGT FPRFX ## rand() \ +{\ + if (sizeof(RNGT) <= sizeof(int32_t)) \ + return (RNGT)gk_randint32(); \ + else \ + return (RNGT)gk_randint64(); \ +}\ +\ +\ +/*************************************************************************/\ +/*! Returns a random number between [0, max) */ \ +/**************************************************************************/\ +RNGT FPRFX ## randInRange(RNGT max) \ +{\ + return (RNGT)((FPRFX ## rand())%max); \ +}\ +\ +\ +/*************************************************************************/\ +/*! Randomly permutes the elements of an array p[]. \ + flag == 1, p[i] = i prior to permutation, \ + flag == 0, p[] is not initialized. */\ +/**************************************************************************/\ +void FPRFX ## randArrayPermute(RNGT n, VALT *p, RNGT nshuffles, int flag)\ +{\ + RNGT i, u, v;\ + VALT tmp;\ +\ + if (flag == 1) {\ + for (i=0; ikey < (b)->key) + * GKQSORT(struct elt, arr, n, elt_lt); + * } + * + * And so on. + */ + +/* Swap two items pointed to by A and B using temporary buffer t. */ +#define _GKQSORT_SWAP(a, b, t) ((void)((t = *a), (*a = *b), (*b = t))) + +/* Discontinue quicksort algorithm when partition gets below this size. */ +#define _GKQSORT_MAX_THRESH 8 + +/* The next 4 #defines implement a very fast in-line stack abstraction. */ +#define _GKQSORT_STACK_SIZE (8 * sizeof(size_t)) +#define _GKQSORT_PUSH(top, low, high) (((top->_lo = (low)), (top->_hi = (high)), ++top)) +#define _GKQSORT_POP(low, high, top) ((--top, (low = top->_lo), (high = top->_hi))) +#define _GKQSORT_STACK_NOT_EMPTY (_stack < _top) + + +/* The main code starts here... */ +#define GK_MKQSORT(GKQSORT_TYPE,GKQSORT_BASE,GKQSORT_NELT,GKQSORT_LT) \ +{ \ + GKQSORT_TYPE *const _base = (GKQSORT_BASE); \ + const size_t _elems = (GKQSORT_NELT); \ + GKQSORT_TYPE _hold; \ + \ + if (_elems < 1) \ + return; \ + \ + /* Don't declare two variables of type GKQSORT_TYPE in a single \ + * statement: eg `TYPE a, b;', in case if TYPE is a pointer, \ + * expands to `type* a, b;' wich isn't what we want. \ + */ \ + \ + if (_elems > _GKQSORT_MAX_THRESH) { \ + GKQSORT_TYPE *_lo = _base; \ + GKQSORT_TYPE *_hi = _lo + _elems - 1; \ + struct { \ + GKQSORT_TYPE *_hi; GKQSORT_TYPE *_lo; \ + } _stack[_GKQSORT_STACK_SIZE], *_top = _stack + 1; \ + \ + while (_GKQSORT_STACK_NOT_EMPTY) { \ + GKQSORT_TYPE *_left_ptr; GKQSORT_TYPE *_right_ptr; \ + \ + /* Select median value from among LO, MID, and HI. Rearrange \ + LO and HI so the three values are sorted. This lowers the \ + probability of picking a pathological pivot value and \ + skips a comparison for both the LEFT_PTR and RIGHT_PTR in \ + the while loops. */ \ + \ + GKQSORT_TYPE *_mid = _lo + ((_hi - _lo) >> 1); \ + \ + if (GKQSORT_LT (_mid, _lo)) \ + _GKQSORT_SWAP (_mid, _lo, _hold); \ + if (GKQSORT_LT (_hi, _mid)) \ + _GKQSORT_SWAP (_mid, _hi, _hold); \ + else \ + goto _jump_over; \ + if (GKQSORT_LT (_mid, _lo)) \ + _GKQSORT_SWAP (_mid, _lo, _hold); \ + _jump_over:; \ + \ + _left_ptr = _lo + 1; \ + _right_ptr = _hi - 1; \ + \ + /* Here's the famous ``collapse the walls'' section of quicksort. \ + Gotta like those tight inner loops! They are the main reason \ + that this algorithm runs much faster than others. */ \ + do { \ + while (GKQSORT_LT (_left_ptr, _mid)) \ + ++_left_ptr; \ + \ + while (GKQSORT_LT (_mid, _right_ptr)) \ + --_right_ptr; \ + \ + if (_left_ptr < _right_ptr) { \ + _GKQSORT_SWAP (_left_ptr, _right_ptr, _hold); \ + if (_mid == _left_ptr) \ + _mid = _right_ptr; \ + else if (_mid == _right_ptr) \ + _mid = _left_ptr; \ + ++_left_ptr; \ + --_right_ptr; \ + } \ + else if (_left_ptr == _right_ptr) { \ + ++_left_ptr; \ + --_right_ptr; \ + break; \ + } \ + } while (_left_ptr <= _right_ptr); \ + \ + /* Set up pointers for next iteration. First determine whether \ + left and right partitions are below the threshold size. If so, \ + ignore one or both. Otherwise, push the larger partition's \ + bounds on the stack and continue sorting the smaller one. */ \ + \ + if (_right_ptr - _lo <= _GKQSORT_MAX_THRESH) { \ + if (_hi - _left_ptr <= _GKQSORT_MAX_THRESH) \ + /* Ignore both small partitions. */ \ + _GKQSORT_POP (_lo, _hi, _top); \ + else \ + /* Ignore small left partition. */ \ + _lo = _left_ptr; \ + } \ + else if (_hi - _left_ptr <= _GKQSORT_MAX_THRESH) \ + /* Ignore small right partition. */ \ + _hi = _right_ptr; \ + else if (_right_ptr - _lo > _hi - _left_ptr) { \ + /* Push larger left partition indices. */ \ + _GKQSORT_PUSH (_top, _lo, _right_ptr); \ + _lo = _left_ptr; \ + } \ + else { \ + /* Push larger right partition indices. */ \ + _GKQSORT_PUSH (_top, _left_ptr, _hi); \ + _hi = _right_ptr; \ + } \ + } \ + } \ + \ + /* Once the BASE array is partially sorted by quicksort the rest \ + is completely sorted using insertion sort, since this is efficient \ + for partitions below MAX_THRESH size. BASE points to the \ + beginning of the array to sort, and END_PTR points at the very \ + last element in the array (*not* one beyond it!). */ \ + \ + { \ + GKQSORT_TYPE *const _end_ptr = _base + _elems - 1; \ + GKQSORT_TYPE *_tmp_ptr = _base; \ + register GKQSORT_TYPE *_run_ptr; \ + GKQSORT_TYPE *_thresh; \ + \ + _thresh = _base + _GKQSORT_MAX_THRESH; \ + if (_thresh > _end_ptr) \ + _thresh = _end_ptr; \ + \ + /* Find smallest element in first threshold and place it at the \ + array's beginning. This is the smallest array element, \ + and the operation speeds up insertion sort's inner loop. */ \ + \ + for (_run_ptr = _tmp_ptr + 1; _run_ptr <= _thresh; ++_run_ptr) \ + if (GKQSORT_LT (_run_ptr, _tmp_ptr)) \ + _tmp_ptr = _run_ptr; \ + \ + if (_tmp_ptr != _base) \ + _GKQSORT_SWAP (_tmp_ptr, _base, _hold); \ + \ + /* Insertion sort, running from left-hand-side \ + * up to right-hand-side. */ \ + \ + _run_ptr = _base + 1; \ + while (++_run_ptr <= _end_ptr) { \ + _tmp_ptr = _run_ptr - 1; \ + while (GKQSORT_LT (_run_ptr, _tmp_ptr)) \ + --_tmp_ptr; \ + \ + ++_tmp_ptr; \ + if (_tmp_ptr != _run_ptr) { \ + GKQSORT_TYPE *_trav = _run_ptr + 1; \ + while (--_trav >= _run_ptr) { \ + GKQSORT_TYPE *_hi; GKQSORT_TYPE *_lo; \ + _hold = *_trav; \ + \ + for (_hi = _lo = _trav; --_lo >= _tmp_ptr; _hi = _lo) \ + *_hi = *_lo; \ + *_hi = _hold; \ + } \ + } \ + } \ + } \ + \ +} + +#endif diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_mkutils.h b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_mkutils.h new file mode 100644 index 00000000..a092f222 --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_mkutils.h @@ -0,0 +1,40 @@ +/*! +\file +\brief Templates for various utility routines + +\date Started 5/28/07 +\author George +\version\verbatim $Id: gk_mkutils.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim +*/ + +#ifndef _GK_MKUTILS_H_ +#define _GK_MKUTILS_H_ + + +#define GK_MKARRAY2CSR(PRFX, TYPE)\ +/*************************************************************************/\ +/*! The macro for gk_?array2csr() routine */\ +/**************************************************************************/\ +void PRFX ## array2csr(TYPE n, TYPE range, TYPE *array, TYPE *ptr, TYPE *ind)\ +{\ + TYPE i;\ +\ + for (i=0; i<=range; i++)\ + ptr[i] = 0;\ +\ + for (i=0; i 1000 +#pragma once +#endif + +#include "gk_ms_stdint.h" + +// 7.8 Format conversion of integer types + +typedef struct { + intmax_t quot; + intmax_t rem; +} imaxdiv_t; + +// 7.8.1 Macros for format specifiers + +// The fprintf macros for signed integers are: +#define PRId8 "d" +#define PRIi8 "i" +#define PRIdLEAST8 "d" +#define PRIiLEAST8 "i" +#define PRIdFAST8 "d" +#define PRIiFAST8 "i" + +#define PRId16 "hd" +#define PRIi16 "hi" +#define PRIdLEAST16 "hd" +#define PRIiLEAST16 "hi" +#define PRIdFAST16 "hd" +#define PRIiFAST16 "hi" + +#define PRId32 "I32d" +#define PRIi32 "I32i" +#define PRIdLEAST32 "I32d" +#define PRIiLEAST32 "I32i" +#define PRIdFAST32 "I32d" +#define PRIiFAST32 "I32i" + +#define PRId64 "I64d" +#define PRIi64 "I64i" +#define PRIdLEAST64 "I64d" +#define PRIiLEAST64 "I64i" +#define PRIdFAST64 "I64d" +#define PRIiFAST64 "I64i" + +#define PRIdMAX "I64d" +#define PRIiMAX "I64i" + +#define PRIdPTR "Id" +#define PRIiPTR "Ii" + +// The fprintf macros for unsigned integers are: +#define PRIo8 "o" +#define PRIu8 "u" +#define PRIx8 "x" +#define PRIX8 "X" +#define PRIoLEAST8 "o" +#define PRIuLEAST8 "u" +#define PRIxLEAST8 "x" +#define PRIXLEAST8 "X" +#define PRIoFAST8 "o" +#define PRIuFAST8 "u" +#define PRIxFAST8 "x" +#define PRIXFAST8 "X" + +#define PRIo16 "ho" +#define PRIu16 "hu" +#define PRIx16 "hx" +#define PRIX16 "hX" +#define PRIoLEAST16 "ho" +#define PRIuLEAST16 "hu" +#define PRIxLEAST16 "hx" +#define PRIXLEAST16 "hX" +#define PRIoFAST16 "ho" +#define PRIuFAST16 "hu" +#define PRIxFAST16 "hx" +#define PRIXFAST16 "hX" + +#define PRIo32 "I32o" +#define PRIu32 "I32u" +#define PRIx32 "I32x" +#define PRIX32 "I32X" +#define PRIoLEAST32 "I32o" +#define PRIuLEAST32 "I32u" +#define PRIxLEAST32 "I32x" +#define PRIXLEAST32 "I32X" +#define PRIoFAST32 "I32o" +#define PRIuFAST32 "I32u" +#define PRIxFAST32 "I32x" +#define PRIXFAST32 "I32X" + +#define PRIo64 "I64o" +#define PRIu64 "I64u" +#define PRIx64 "I64x" +#define PRIX64 "I64X" +#define PRIoLEAST64 "I64o" +#define PRIuLEAST64 "I64u" +#define PRIxLEAST64 "I64x" +#define PRIXLEAST64 "I64X" +#define PRIoFAST64 "I64o" +#define PRIuFAST64 "I64u" +#define PRIxFAST64 "I64x" +#define PRIXFAST64 "I64X" + +#define PRIoMAX "I64o" +#define PRIuMAX "I64u" +#define PRIxMAX "I64x" +#define PRIXMAX "I64X" + +#define PRIoPTR "Io" +#define PRIuPTR "Iu" +#define PRIxPTR "Ix" +#define PRIXPTR "IX" + +// The fscanf macros for signed integers are: +#define SCNd8 "d" +#define SCNi8 "i" +#define SCNdLEAST8 "d" +#define SCNiLEAST8 "i" +#define SCNdFAST8 "d" +#define SCNiFAST8 "i" + +#define SCNd16 "hd" +#define SCNi16 "hi" +#define SCNdLEAST16 "hd" +#define SCNiLEAST16 "hi" +#define SCNdFAST16 "hd" +#define SCNiFAST16 "hi" + +#define SCNd32 "ld" +#define SCNi32 "li" +#define SCNdLEAST32 "ld" +#define SCNiLEAST32 "li" +#define SCNdFAST32 "ld" +#define SCNiFAST32 "li" + +#define SCNd64 "I64d" +#define SCNi64 "I64i" +#define SCNdLEAST64 "I64d" +#define SCNiLEAST64 "I64i" +#define SCNdFAST64 "I64d" +#define SCNiFAST64 "I64i" + +#define SCNdMAX "I64d" +#define SCNiMAX "I64i" + +#ifdef _WIN64 // [ +# define SCNdPTR "I64d" +# define SCNiPTR "I64i" +#else // _WIN64 ][ +# define SCNdPTR "ld" +# define SCNiPTR "li" +#endif // _WIN64 ] + +// The fscanf macros for unsigned integers are: +#define SCNo8 "o" +#define SCNu8 "u" +#define SCNx8 "x" +#define SCNX8 "X" +#define SCNoLEAST8 "o" +#define SCNuLEAST8 "u" +#define SCNxLEAST8 "x" +#define SCNXLEAST8 "X" +#define SCNoFAST8 "o" +#define SCNuFAST8 "u" +#define SCNxFAST8 "x" +#define SCNXFAST8 "X" + +#define SCNo16 "ho" +#define SCNu16 "hu" +#define SCNx16 "hx" +#define SCNX16 "hX" +#define SCNoLEAST16 "ho" +#define SCNuLEAST16 "hu" +#define SCNxLEAST16 "hx" +#define SCNXLEAST16 "hX" +#define SCNoFAST16 "ho" +#define SCNuFAST16 "hu" +#define SCNxFAST16 "hx" +#define SCNXFAST16 "hX" + +#define SCNo32 "lo" +#define SCNu32 "lu" +#define SCNx32 "lx" +#define SCNX32 "lX" +#define SCNoLEAST32 "lo" +#define SCNuLEAST32 "lu" +#define SCNxLEAST32 "lx" +#define SCNXLEAST32 "lX" +#define SCNoFAST32 "lo" +#define SCNuFAST32 "lu" +#define SCNxFAST32 "lx" +#define SCNXFAST32 "lX" + +#define SCNo64 "I64o" +#define SCNu64 "I64u" +#define SCNx64 "I64x" +#define SCNX64 "I64X" +#define SCNoLEAST64 "I64o" +#define SCNuLEAST64 "I64u" +#define SCNxLEAST64 "I64x" +#define SCNXLEAST64 "I64X" +#define SCNoFAST64 "I64o" +#define SCNuFAST64 "I64u" +#define SCNxFAST64 "I64x" +#define SCNXFAST64 "I64X" + +#define SCNoMAX "I64o" +#define SCNuMAX "I64u" +#define SCNxMAX "I64x" +#define SCNXMAX "I64X" + +#ifdef _WIN64 // [ +# define SCNoPTR "I64o" +# define SCNuPTR "I64u" +# define SCNxPTR "I64x" +# define SCNXPTR "I64X" +#else // _WIN64 ][ +# define SCNoPTR "lo" +# define SCNuPTR "lu" +# define SCNxPTR "lx" +# define SCNXPTR "lX" +#endif // _WIN64 ] + +// 7.8.2 Functions for greatest-width integer types + +// 7.8.2.1 The imaxabs function +#define imaxabs _abs64 + +// 7.8.2.2 The imaxdiv function + +// This is modified version of div() function from Microsoft's div.c found +// in %MSVC.NET%\crt\src\div.c +#ifdef STATIC_IMAXDIV // [ +static +#else // STATIC_IMAXDIV ][ +_inline +#endif // STATIC_IMAXDIV ] +imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom) +{ + imaxdiv_t result; + + result.quot = numer / denom; + result.rem = numer % denom; + + if (numer < 0 && result.rem > 0) { + // did division wrong; must fix up + ++result.quot; + result.rem -= denom; + } + + return result; +} + +// 7.8.2.3 The strtoimax and strtoumax functions +#define strtoimax _strtoi64 +#define strtoumax _strtoui64 + +// 7.8.2.4 The wcstoimax and wcstoumax functions +#define wcstoimax _wcstoi64 +#define wcstoumax _wcstoui64 + + +#endif // _MSC_INTTYPES_H_ ] diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_ms_stat.h b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_ms_stat.h new file mode 100644 index 00000000..a1ef6faf --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_ms_stat.h @@ -0,0 +1,22 @@ +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MS_STAT_H_ +#define _MS_STAT_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include +/* Test macros for file types. */ + +#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask)) + +#define S_ISDIR(mode) __S_ISTYPE((mode), S_IFDIR) +#define S_ISCHR(mode) __S_ISTYPE((mode), S_IFCHR) +#define S_ISBLK(mode) __S_ISTYPE((mode), S_IFBLK) +#define S_ISREG(mode) __S_ISTYPE((mode), S_IFREG) + +#endif diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_ms_stdint.h b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_ms_stdint.h new file mode 100644 index 00000000..7e200dc6 --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_ms_stdint.h @@ -0,0 +1,222 @@ +// ISO C9x compliant stdint.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_STDINT_H_ // [ +#define _MSC_STDINT_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include + +// For Visual Studio 6 in C++ mode wrap include with 'extern "C++" {}' +// or compiler give many errors like this: +// error C2733: second C linkage of overloaded function 'wmemchr' not allowed +#if (_MSC_VER < 1300) && defined(__cplusplus) + extern "C++" { +#endif +# include +#if (_MSC_VER < 1300) && defined(__cplusplus) + } +#endif + +// 7.18.1 Integer types + +// 7.18.1.1 Exact-width integer types +typedef __int8 int8_t; +typedef __int16 int16_t; +typedef __int32 int32_t; +typedef __int64 int64_t; +typedef unsigned __int8 uint8_t; +typedef unsigned __int16 uint16_t; +typedef unsigned __int32 uint32_t; +typedef unsigned __int64 uint64_t; + +// 7.18.1.2 Minimum-width integer types +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +// 7.18.1.3 Fastest minimum-width integer types +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +// 7.18.1.4 Integer types capable of holding object pointers +#ifdef _WIN64 // [ + typedef __int64 intptr_t; + typedef unsigned __int64 uintptr_t; +#else // _WIN64 ][ + typedef int intptr_t; + typedef unsigned int uintptr_t; +#endif // _WIN64 ] + +// 7.18.1.5 Greatest-width integer types +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + + +// 7.18.2 Limits of specified-width integer types + +#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 + +// 7.18.2.1 Limits of exact-width integer types +#define INT8_MIN ((int8_t)_I8_MIN) +#define INT8_MAX _I8_MAX +#define INT16_MIN ((int16_t)_I16_MIN) +#define INT16_MAX _I16_MAX +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#define UINT8_MAX _UI8_MAX +#define UINT16_MAX _UI16_MAX +#define UINT32_MAX _UI32_MAX +#define UINT64_MAX _UI64_MAX + +// 7.18.2.2 Limits of minimum-width integer types +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MIN INT64_MIN +#define INT_LEAST64_MAX INT64_MAX +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +// 7.18.2.3 Limits of fastest minimum-width integer types +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MIN INT64_MIN +#define INT_FAST64_MAX INT64_MAX +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +// 7.18.2.4 Limits of integer types capable of holding object pointers +#ifdef _WIN64 // [ +# define INTPTR_MIN INT64_MIN +# define INTPTR_MAX INT64_MAX +# define UINTPTR_MAX UINT64_MAX +#else // _WIN64 ][ +# define INTPTR_MIN INT32_MIN +# define INTPTR_MAX INT32_MAX +# define UINTPTR_MAX UINT32_MAX +#endif // _WIN64 ] + +// 7.18.2.5 Limits of greatest-width integer types +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + +// 7.18.3 Limits of other integer types + +#ifdef _WIN64 // [ +# define PTRDIFF_MIN _I64_MIN +# define PTRDIFF_MAX _I64_MAX +#else // _WIN64 ][ +# define PTRDIFF_MIN _I32_MIN +# define PTRDIFF_MAX _I32_MAX +#endif // _WIN64 ] + +#define SIG_ATOMIC_MIN INT_MIN +#define SIG_ATOMIC_MAX INT_MAX + +#ifndef SIZE_MAX // [ +# ifdef _WIN64 // [ +# define SIZE_MAX _UI64_MAX +# else // _WIN64 ][ +# define SIZE_MAX _UI32_MAX +# endif // _WIN64 ] +#endif // SIZE_MAX ] + +// WCHAR_MIN and WCHAR_MAX are also defined in +#ifndef WCHAR_MIN // [ +# define WCHAR_MIN 0 +#endif // WCHAR_MIN ] +#ifndef WCHAR_MAX // [ +# define WCHAR_MAX _UI16_MAX +#endif // WCHAR_MAX ] + +#define WINT_MIN 0 +#define WINT_MAX _UI16_MAX + +#endif // __STDC_LIMIT_MACROS ] + + +// 7.18.4 Limits of other integer types + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +#define INTMAX_C INT64_C +#define UINTMAX_C UINT64_C + +#endif // __STDC_CONSTANT_MACROS ] + + +#endif // _MSC_STDINT_H_ ] diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_proto.h b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_proto.h new file mode 100644 index 00000000..6fd6bd4a --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_proto.h @@ -0,0 +1,426 @@ +/*! +\file gk_proto.h +\brief This file contains function prototypes + +\date Started 3/27/2007 +\author George +\version\verbatim $Id: gk_proto.h 22010 2018-05-14 20:20:26Z karypis $ \endverbatim +*/ + +#ifndef _GK_PROTO_H_ +#define _GK_PROTO_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/*------------------------------------------------------------- + * blas.c + *-------------------------------------------------------------*/ +GK_MKBLAS_PROTO(gk_c, char, int) +GK_MKBLAS_PROTO(gk_i, int, int) +GK_MKBLAS_PROTO(gk_i8, int8_t, int8_t) +GK_MKBLAS_PROTO(gk_i16, int16_t, int16_t) +GK_MKBLAS_PROTO(gk_i32, int32_t, int32_t) +GK_MKBLAS_PROTO(gk_i64, int64_t, int64_t) +GK_MKBLAS_PROTO(gk_z, ssize_t, ssize_t) +GK_MKBLAS_PROTO(gk_zu, size_t, size_t) +GK_MKBLAS_PROTO(gk_f, float, float) +GK_MKBLAS_PROTO(gk_d, double, double) +GK_MKBLAS_PROTO(gk_idx, gk_idx_t, gk_idx_t) + + + + +/*------------------------------------------------------------- + * io.c + *-------------------------------------------------------------*/ +FILE *gk_fopen(char *, char *, const char *); +void gk_fclose(FILE *); +ssize_t gk_read(int fd, void *vbuf, size_t count); +ssize_t gk_write(int fd, void *vbuf, size_t count); +ssize_t gk_getline(char **lineptr, size_t *n, FILE *stream); +char **gk_readfile(char *fname, size_t *r_nlines); +int32_t *gk_i32readfile(char *fname, size_t *r_nlines); +int64_t *gk_i64readfile(char *fname, size_t *r_nlines); +ssize_t *gk_zreadfile(char *fname, size_t *r_nlines); +char *gk_creadfilebin(char *fname, size_t *r_nelmnts); +size_t gk_cwritefilebin(char *fname, size_t n, char *a); +int32_t *gk_i32readfilebin(char *fname, size_t *r_nelmnts); +size_t gk_i32writefilebin(char *fname, size_t n, int32_t *a); +int64_t *gk_i64readfilebin(char *fname, size_t *r_nelmnts); +size_t gk_i64writefilebin(char *fname, size_t n, int64_t *a); +ssize_t *gk_zreadfilebin(char *fname, size_t *r_nelmnts); +size_t gk_zwritefilebin(char *fname, size_t n, ssize_t *a); +float *gk_freadfilebin(char *fname, size_t *r_nelmnts); +size_t gk_fwritefilebin(char *fname, size_t n, float *a); +double *gk_dreadfilebin(char *fname, size_t *r_nelmnts); +size_t gk_dwritefilebin(char *fname, size_t n, double *a); + + + + +/*------------------------------------------------------------- + * fs.c + *-------------------------------------------------------------*/ +int gk_fexists(char *); +int gk_dexists(char *); +ssize_t gk_getfsize(char *); +void gk_getfilestats(char *fname, size_t *r_nlines, size_t *r_ntokens, + size_t *r_max_nlntokens, size_t *r_nbytes); +char *gk_getbasename(char *path); +char *gk_getextname(char *path); +char *gk_getfilename(char *path); +char *gk_getpathname(char *path); +int gk_mkpath(char *); +int gk_rmpath(char *); + + + +/*------------------------------------------------------------- + * memory.c + *-------------------------------------------------------------*/ +GK_MKALLOC_PROTO(gk_c, char) +GK_MKALLOC_PROTO(gk_i, int) +GK_MKALLOC_PROTO(gk_i8, int8_t) +GK_MKALLOC_PROTO(gk_i16, int16_t) +GK_MKALLOC_PROTO(gk_i32, int32_t) +GK_MKALLOC_PROTO(gk_i64, int64_t) +GK_MKALLOC_PROTO(gk_ui8, uint8_t) +GK_MKALLOC_PROTO(gk_ui16, uint16_t) +GK_MKALLOC_PROTO(gk_ui32, uint32_t) +GK_MKALLOC_PROTO(gk_ui64, uint64_t) +GK_MKALLOC_PROTO(gk_z, ssize_t) +GK_MKALLOC_PROTO(gk_zu, size_t) +GK_MKALLOC_PROTO(gk_f, float) +GK_MKALLOC_PROTO(gk_d, double) +GK_MKALLOC_PROTO(gk_idx, gk_idx_t) + +GK_MKALLOC_PROTO(gk_ckv, gk_ckv_t) +GK_MKALLOC_PROTO(gk_ikv, gk_ikv_t) +GK_MKALLOC_PROTO(gk_i8kv, gk_i8kv_t) +GK_MKALLOC_PROTO(gk_i16kv, gk_i16kv_t) +GK_MKALLOC_PROTO(gk_i32kv, gk_i32kv_t) +GK_MKALLOC_PROTO(gk_i64kv, gk_i64kv_t) +GK_MKALLOC_PROTO(gk_zkv, gk_zkv_t) +GK_MKALLOC_PROTO(gk_zukv, gk_zukv_t) +GK_MKALLOC_PROTO(gk_fkv, gk_fkv_t) +GK_MKALLOC_PROTO(gk_dkv, gk_dkv_t) +GK_MKALLOC_PROTO(gk_skv, gk_skv_t) +GK_MKALLOC_PROTO(gk_idxkv, gk_idxkv_t) + +void gk_AllocMatrix(void ***, size_t, size_t , size_t); +void gk_FreeMatrix(void ***, size_t, size_t); +int gk_malloc_init(); +void gk_malloc_cleanup(int showstats); +void *gk_malloc(size_t nbytes, char *msg); +void *gk_realloc(void *oldptr, size_t nbytes, char *msg); +void gk_free(void **ptr1,...); +size_t gk_GetCurMemoryUsed(); +size_t gk_GetMaxMemoryUsed(); +void gk_GetVMInfo(size_t *vmsize, size_t *vmrss); +size_t gk_GetProcVmPeak(); + + + +/*------------------------------------------------------------- + * seq.c + *-------------------------------------------------------------*/ +gk_seq_t *gk_seq_ReadGKMODPSSM(char *file_name); +gk_i2cc2i_t *gk_i2cc2i_create_common(char *alphabet); +void gk_seq_init(gk_seq_t *seq); + + + +/*------------------------------------------------------------- + * error.c + *-------------------------------------------------------------*/ +void gk_set_exit_on_error(int value); +void errexit(char *,...); +void gk_errexit(int signum, char *,...); +int gk_sigtrap(); +int gk_siguntrap(); +void gk_sigthrow(int signum); +void gk_SetSignalHandlers(); +void gk_UnsetSignalHandlers(); +void gk_NonLocalExit_Handler(int signum); +char *gk_strerror(int errnum); +void PrintBackTrace(); + + +/*------------------------------------------------------------- + * util.c + *-------------------------------------------------------------*/ +void gk_RandomPermute(size_t, int *, int); +void gk_array2csr(size_t n, size_t range, int *array, int *ptr, int *ind); +int gk_log2(int); +int gk_ispow2(int); +float gk_flog2(float); + + +/*------------------------------------------------------------- + * time.c + *-------------------------------------------------------------*/ +gk_wclock_t gk_WClockSeconds(void); +double gk_CPUSeconds(void); + +/*------------------------------------------------------------- + * string.c + *-------------------------------------------------------------*/ +char *gk_strchr_replace(char *str, char *fromlist, char *tolist); +int gk_strstr_replace(char *str, char *pattern, char *replacement, char *options, char **new_str); +char *gk_strtprune(char *, char *); +char *gk_strhprune(char *, char *); +char *gk_strtoupper(char *); +char *gk_strtolower(char *); +char *gk_strdup(char *orgstr); +int gk_strcasecmp(char *s1, char *s2); +int gk_strrcmp(char *s1, char *s2); +char *gk_time2str(time_t time); +time_t gk_str2time(char *str); +int gk_GetStringID(gk_StringMap_t *strmap, char *key); + + + +/*------------------------------------------------------------- + * sort.c + *-------------------------------------------------------------*/ +void gk_csorti(size_t, char *); +void gk_csortd(size_t, char *); +void gk_isorti(size_t, int *); +void gk_isortd(size_t, int *); +void gk_i32sorti(size_t, int32_t *); +void gk_i32sortd(size_t, int32_t *); +void gk_i64sorti(size_t, int64_t *); +void gk_i64sortd(size_t, int64_t *); +void gk_ui32sorti(size_t, uint32_t *); +void gk_ui32sortd(size_t, uint32_t *); +void gk_ui64sorti(size_t, uint64_t *); +void gk_ui64sortd(size_t, uint64_t *); +void gk_fsorti(size_t, float *); +void gk_fsortd(size_t, float *); +void gk_dsorti(size_t, double *); +void gk_dsortd(size_t, double *); +void gk_idxsorti(size_t, gk_idx_t *); +void gk_idxsortd(size_t, gk_idx_t *); +void gk_ckvsorti(size_t, gk_ckv_t *); +void gk_ckvsortd(size_t, gk_ckv_t *); +void gk_ikvsorti(size_t, gk_ikv_t *); +void gk_ikvsortd(size_t, gk_ikv_t *); +void gk_i32kvsorti(size_t, gk_i32kv_t *); +void gk_i32kvsortd(size_t, gk_i32kv_t *); +void gk_i64kvsorti(size_t, gk_i64kv_t *); +void gk_i64kvsortd(size_t, gk_i64kv_t *); +void gk_zkvsorti(size_t, gk_zkv_t *); +void gk_zkvsortd(size_t, gk_zkv_t *); +void gk_zukvsorti(size_t, gk_zukv_t *); +void gk_zukvsortd(size_t, gk_zukv_t *); +void gk_fkvsorti(size_t, gk_fkv_t *); +void gk_fkvsortd(size_t, gk_fkv_t *); +void gk_dkvsorti(size_t, gk_dkv_t *); +void gk_dkvsortd(size_t, gk_dkv_t *); +void gk_skvsorti(size_t, gk_skv_t *); +void gk_skvsortd(size_t, gk_skv_t *); +void gk_idxkvsorti(size_t, gk_idxkv_t *); +void gk_idxkvsortd(size_t, gk_idxkv_t *); + + +/*------------------------------------------------------------- + * Selection routines + *-------------------------------------------------------------*/ +int gk_dfkvkselect(size_t, int, gk_fkv_t *); +int gk_ifkvkselect(size_t, int, gk_fkv_t *); + + +/*------------------------------------------------------------- + * Priority queue + *-------------------------------------------------------------*/ +GK_MKPQUEUE_PROTO(gk_ipq, gk_ipq_t, int, gk_idx_t) +GK_MKPQUEUE_PROTO(gk_i32pq, gk_i32pq_t, int32_t, gk_idx_t) +GK_MKPQUEUE_PROTO(gk_i64pq, gk_i64pq_t, int64_t, gk_idx_t) +GK_MKPQUEUE_PROTO(gk_fpq, gk_fpq_t, float, gk_idx_t) +GK_MKPQUEUE_PROTO(gk_dpq, gk_dpq_t, double, gk_idx_t) +GK_MKPQUEUE_PROTO(gk_idxpq, gk_idxpq_t, gk_idx_t, gk_idx_t) + + +/*------------------------------------------------------------- + * HTable routines + *-------------------------------------------------------------*/ +gk_HTable_t *HTable_Create(int nelements); +void HTable_Reset(gk_HTable_t *htable); +void HTable_Resize(gk_HTable_t *htable, int nelements); +void HTable_Insert(gk_HTable_t *htable, int key, int val); +void HTable_Delete(gk_HTable_t *htable, int key); +int HTable_Search(gk_HTable_t *htable, int key); +int HTable_GetNext(gk_HTable_t *htable, int key, int *val, int type); +int HTable_SearchAndDelete(gk_HTable_t *htable, int key); +void HTable_Destroy(gk_HTable_t *htable); +int HTable_HFunction(int nelements, int key); + + +/*------------------------------------------------------------- + * Tokenizer routines + *-------------------------------------------------------------*/ +void gk_strtokenize(char *line, char *delim, gk_Tokens_t *tokens); +void gk_freetokenslist(gk_Tokens_t *tokens); + +/*------------------------------------------------------------- + * Encoder/Decoder + *-------------------------------------------------------------*/ +void encodeblock(unsigned char *in, unsigned char *out); +void decodeblock(unsigned char *in, unsigned char *out); +void GKEncodeBase64(int nbytes, unsigned char *inbuffer, unsigned char *outbuffer); +void GKDecodeBase64(int nbytes, unsigned char *inbuffer, unsigned char *outbuffer); + + +/*------------------------------------------------------------- + * random.c + *-------------------------------------------------------------*/ +GK_MKRANDOM_PROTO(gk_c, size_t, char) +GK_MKRANDOM_PROTO(gk_i, size_t, int) +GK_MKRANDOM_PROTO(gk_i32, size_t, int32_t) +GK_MKRANDOM_PROTO(gk_f, size_t, float) +GK_MKRANDOM_PROTO(gk_d, size_t, double) +GK_MKRANDOM_PROTO(gk_idx, size_t, gk_idx_t) +GK_MKRANDOM_PROTO(gk_z, size_t, ssize_t) +GK_MKRANDOM_PROTO(gk_zu, size_t, size_t) +void gk_randinit(uint64_t); +uint64_t gk_randint64(void); +uint32_t gk_randint32(void); + + +/*------------------------------------------------------------- + * OpenMP fake functions + *-------------------------------------------------------------*/ +#if !defined(__OPENMP__) +void omp_set_num_threads(int num_threads); +int omp_get_num_threads(void); +int omp_get_max_threads(void); +int omp_get_thread_num(void); +int omp_get_num_procs(void); +int omp_in_parallel(void); +void omp_set_dynamic(int num_threads); +int omp_get_dynamic(void); +void omp_set_nested(int nested); +int omp_get_nested(void); +#endif /* __OPENMP__ */ + + +/*------------------------------------------------------------- + * CSR-related functions + *-------------------------------------------------------------*/ +gk_csr_t *gk_csr_Create(); +void gk_csr_Init(gk_csr_t *mat); +void gk_csr_Free(gk_csr_t **mat); +void gk_csr_FreeContents(gk_csr_t *mat); +gk_csr_t *gk_csr_Dup(gk_csr_t *mat); +gk_csr_t *gk_csr_ExtractSubmatrix(gk_csr_t *mat, int rstart, int nrows); +gk_csr_t *gk_csr_ExtractRows(gk_csr_t *mat, int nrows, int *rind); +gk_csr_t *gk_csr_ExtractPartition(gk_csr_t *mat, int *part, int pid); +gk_csr_t **gk_csr_Split(gk_csr_t *mat, int *color); +int gk_csr_DetermineFormat(char *filename, int format); +gk_csr_t *gk_csr_Read(char *filename, int format, int readvals, int numbering); +void gk_csr_Write(gk_csr_t *mat, char *filename, int format, int writevals, int numbering); +gk_csr_t *gk_csr_Prune(gk_csr_t *mat, int what, int minf, int maxf); +gk_csr_t *gk_csr_LowFilter(gk_csr_t *mat, int what, int norm, float fraction); +gk_csr_t *gk_csr_TopKPlusFilter(gk_csr_t *mat, int what, int topk, float keepval); +gk_csr_t *gk_csr_ZScoreFilter(gk_csr_t *mat, int what, float zscore); +void gk_csr_CompactColumns(gk_csr_t *mat); +void gk_csr_SortIndices(gk_csr_t *mat, int what); +void gk_csr_CreateIndex(gk_csr_t *mat, int what); +void gk_csr_Normalize(gk_csr_t *mat, int what, int norm); +void gk_csr_Scale(gk_csr_t *mat, int type); +void gk_csr_ComputeSums(gk_csr_t *mat, int what); +void gk_csr_ComputeNorms(gk_csr_t *mat, int what); +void gk_csr_ComputeSquaredNorms(gk_csr_t *mat, int what); +gk_csr_t *gk_csr_Shuffle(gk_csr_t *mat, int what, int summetric); +gk_csr_t *gk_csr_Transpose(gk_csr_t *mat); +float gk_csr_ComputeSimilarity(gk_csr_t *mat, int i1, int i2, int what, int simtype); +float gk_csr_ComputePairSimilarity(gk_csr_t *mat_a, gk_csr_t *mat_b, int i1, int i2, int what, int simtype); +int gk_csr_GetSimilarRows(gk_csr_t *mat, int nqterms, int *qind, float *qval, + int simtype, int nsim, float minsim, gk_fkv_t *hits, int *_imarker, + gk_fkv_t *i_cand); +int gk_csr_FindConnectedComponents(gk_csr_t *mat, int32_t *cptr, int32_t *cind, + int32_t *cids); +gk_csr_t *gk_csr_MakeSymmetric(gk_csr_t *mat, int op); +gk_csr_t *gk_csr_ReorderSymmetric(gk_csr_t *mat, int32_t *perm, int32_t *iperm); +void gk_csr_ComputeBFSOrderingSymmetric(gk_csr_t *mat, int maxdegree, int v, + int32_t **r_perm, int32_t **r_iperm); +void gk_csr_ComputeBestFOrderingSymmetric(gk_csr_t *mat, int v, int type, + int32_t **r_perm, int32_t **r_iperm); + + +/* itemsets.c */ +void gk_find_frequent_itemsets(int ntrans, ssize_t *tranptr, int *tranind, + int minfreq, int maxfreq, int minlen, int maxlen, + void (*process_itemset)(void *stateptr, int nitems, int *itemind, + int ntrans, int *tranind), + void *stateptr); + + +/* evaluate.c */ +float ComputeAccuracy(int n, gk_fkv_t *list); +float ComputeROCn(int n, int maxN, gk_fkv_t *list); +float ComputeMedianRFP(int n, gk_fkv_t *list); +float ComputeMean (int n, float *values); +float ComputeStdDev(int n, float *values); + + +/* mcore.c */ +gk_mcore_t *gk_mcoreCreate(size_t coresize); +gk_mcore_t *gk_gkmcoreCreate(); +void gk_mcoreDestroy(gk_mcore_t **r_mcore, int showstats); +void gk_gkmcoreDestroy(gk_mcore_t **r_mcore, int showstats); +void *gk_mcoreMalloc(gk_mcore_t *mcore, size_t nbytes); +void gk_mcorePush(gk_mcore_t *mcore); +void gk_gkmcorePush(gk_mcore_t *mcore); +void gk_mcorePop(gk_mcore_t *mcore); +void gk_gkmcorePop(gk_mcore_t *mcore); +void gk_mcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr); +void gk_gkmcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr); +void gk_mcoreDel(gk_mcore_t *mcore, void *ptr); +void gk_gkmcoreDel(gk_mcore_t *mcore, void *ptr); + +/* rw.c */ +int gk_rw_PageRank(gk_csr_t *mat, float lamda, float eps, int max_niter, float *pr); + + +/* graph.c */ +gk_graph_t *gk_graph_Create(); +void gk_graph_Init(gk_graph_t *graph); +void gk_graph_Free(gk_graph_t **graph); +void gk_graph_FreeContents(gk_graph_t *graph); +gk_graph_t *gk_graph_Read(char *filename, int format, int hasvals, + int numbering, int isfewgts, int isfvwgts, int isfvsizes); +void gk_graph_Write(gk_graph_t *graph, char *filename, int format, int numbering); +gk_graph_t *gk_graph_Dup(gk_graph_t *graph); +gk_graph_t *gk_graph_Transpose(gk_graph_t *graph); +gk_graph_t *gk_graph_ExtractSubgraph(gk_graph_t *graph, int vstart, int nvtxs); +gk_graph_t *gk_graph_Reorder(gk_graph_t *graph, int32_t *perm, int32_t *iperm); +int gk_graph_FindComponents(gk_graph_t *graph, int32_t *cptr, int32_t *cind); +void gk_graph_ComputeBFSOrdering(gk_graph_t *graph, int v, int32_t **r_perm, + int32_t **r_iperm); +void gk_graph_ComputeBestFOrdering0(gk_graph_t *graph, int v, int type, + int32_t **r_perm, int32_t **r_iperm); +void gk_graph_ComputeBestFOrdering(gk_graph_t *graph, int v, int type, + int32_t **r_perm, int32_t **r_iperm); +void gk_graph_SingleSourceShortestPaths(gk_graph_t *graph, int v, void **r_sps); +void gk_graph_SortAdjacencies(gk_graph_t *graph); +gk_graph_t *gk_graph_MakeSymmetric(gk_graph_t *graph, int op); + + +/* cache.c */ +gk_cache_t *gk_cacheCreate(uint32_t nway, uint32_t lnbits, size_t cnbits); +void gk_cacheReset(gk_cache_t *cache); +void gk_cacheDestroy(gk_cache_t **r_cache); +int gk_cacheLoad(gk_cache_t *cache, size_t addr); +double gk_cacheGetHitRate(gk_cache_t *cache); + + +#ifdef __cplusplus +} +#endif + + +#endif + diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_struct.h b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_struct.h new file mode 100644 index 00000000..2925e982 --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_struct.h @@ -0,0 +1,296 @@ +/*! +\file gk_struct.h +\brief This file contains various datastructures used/provided by GKlib + +\date Started 3/27/2007 +\author George +\version\verbatim $Id: gk_struct.h 21988 2018-04-16 00:11:19Z karypis $ \endverbatim +*/ + +#ifndef _GK_STRUCT_H_ +#define _GK_STRUCT_H_ + + +/********************************************************************/ +/*! Generator for gk_??KeyVal_t data structure */ +/********************************************************************/ +#define GK_MKKEYVALUE_T(NAME, KEYTYPE, VALTYPE) \ +typedef struct {\ + KEYTYPE key;\ + VALTYPE val;\ +} NAME;\ + +/* The actual KeyVal data structures */ +GK_MKKEYVALUE_T(gk_ckv_t, char, ssize_t) +GK_MKKEYVALUE_T(gk_ikv_t, int, ssize_t) +GK_MKKEYVALUE_T(gk_i8kv_t, int8_t, ssize_t) +GK_MKKEYVALUE_T(gk_i16kv_t, int16_t, ssize_t) +GK_MKKEYVALUE_T(gk_i32kv_t, int32_t, ssize_t) +GK_MKKEYVALUE_T(gk_i64kv_t, int64_t, ssize_t) +GK_MKKEYVALUE_T(gk_zkv_t, ssize_t, ssize_t) +GK_MKKEYVALUE_T(gk_zukv_t, size_t, ssize_t) +GK_MKKEYVALUE_T(gk_fkv_t, float, ssize_t) +GK_MKKEYVALUE_T(gk_dkv_t, double, ssize_t) +GK_MKKEYVALUE_T(gk_skv_t, char *, ssize_t) +GK_MKKEYVALUE_T(gk_idxkv_t, gk_idx_t, gk_idx_t) + + + +/********************************************************************/ +/*! Generator for gk_?pq_t data structure */ +/********************************************************************/ +#define GK_MKPQUEUE_T(NAME, KVTYPE)\ +typedef struct {\ + size_t nnodes;\ + size_t maxnodes;\ +\ + /* Heap version of the data structure */ \ + KVTYPE *heap;\ + ssize_t *locator;\ +} NAME;\ + +GK_MKPQUEUE_T(gk_ipq_t, gk_ikv_t) +GK_MKPQUEUE_T(gk_i32pq_t, gk_i32kv_t) +GK_MKPQUEUE_T(gk_i64pq_t, gk_i64kv_t) +GK_MKPQUEUE_T(gk_fpq_t, gk_fkv_t) +GK_MKPQUEUE_T(gk_dpq_t, gk_dkv_t) +GK_MKPQUEUE_T(gk_idxpq_t, gk_idxkv_t) + + +#define GK_MKPQUEUE2_T(NAME, KTYPE, VTYPE)\ +typedef struct {\ + ssize_t nnodes;\ + ssize_t maxnodes;\ +\ + /* Heap version of the data structure */ \ + KTYPE *keys;\ + VTYPE *vals;\ +} NAME;\ + + + +/*------------------------------------------------------------- + * The following data structure stores a sparse CSR format + *-------------------------------------------------------------*/ +typedef struct gk_csr_t { + int32_t nrows, ncols; + ssize_t *rowptr, *colptr; + int32_t *rowind, *colind; + int32_t *rowids, *colids; + int32_t *rlabels, *clabels; + int32_t *rmap, *cmap; + float *rowval, *colval; + float *rnorms, *cnorms; + float *rsums, *csums; + float *rsizes, *csizes; + float *rvols, *cvols; + float *rwgts, *cwgts; +} gk_csr_t; + + +/*------------------------------------------------------------- + * The following data structure stores a sparse graph + *-------------------------------------------------------------*/ +typedef struct gk_graph_t { + int32_t nvtxs; /*!< The number of vertices in the graph */ + ssize_t *xadj; /*!< The ptr-structure of the adjncy list */ + int32_t *adjncy; /*!< The adjacency list of the graph */ + int32_t *iadjwgt; /*!< The integer edge weights */ + float *fadjwgt; /*!< The floating point edge weights */ + int32_t *ivwgts; /*!< The integer vertex weights */ + float *fvwgts; /*!< The floating point vertex weights */ + int32_t *ivsizes; /*!< The integer vertex sizes */ + float *fvsizes; /*!< The floating point vertex sizes */ + int32_t *vlabels; /*!< The labels of the vertices */ +} gk_graph_t; + + +/*------------------------------------------------------------- + * The following data structure stores stores a string as a + * pair of its allocated buffer and the buffer itself. + *-------------------------------------------------------------*/ +typedef struct gk_str_t { + size_t len; + char *buf; +} gk_str_t; + + + + +/*------------------------------------------------------------- +* The following data structure implements a string-2-int mapping +* table used for parsing command-line options +*-------------------------------------------------------------*/ +typedef struct gk_StringMap_t { + char *name; + int id; +} gk_StringMap_t; + + +/*------------------------------------------------------------ + * This structure implements a simple hash table + *------------------------------------------------------------*/ +typedef struct gk_HTable_t { + int nelements; /* The overall size of the hash-table */ + int htsize; /* The current size of the hash-table */ + gk_ikv_t *harray; /* The actual hash-table */ +} gk_HTable_t; + + +/*------------------------------------------------------------ + * This structure implements a gk_Tokens_t list returned by the + * string tokenizer + *------------------------------------------------------------*/ +typedef struct gk_Tokens_t { + int ntoks; /* The number of tokens in the input string */ + char *strbuf; /* The memory that stores all the entries */ + char **list; /* Pointers to the strbuf for each element */ +} gk_Tokens_t; + + +/*------------------------------------------------------------ + * This structure implements storage for an atom in a pdb file + *------------------------------------------------------------*/ +typedef struct atom { + int serial; + char *name; + char altLoc; + char *resname; + char chainid; + int rserial; + char icode; + char element; + double x; + double y; + double z; + double opcy; + double tmpt; +} atom; + + +/*------------------------------------------------------------ + * This structure implements storage for a center of mass for + * a single residue. + *------------------------------------------------------------*/ +typedef struct center_of_mass { + char name; + double x; + double y; + double z; +} center_of_mass; + + +/*------------------------------------------------------------ + * This structure implements storage for a pdb protein + *------------------------------------------------------------*/ +typedef struct pdbf { + int natoms; /* Number of atoms */ + int nresidues; /* Number of residues based on coordinates */ + int ncas; + int nbbs; + int corruption; + char *resSeq; /* Residue sequence based on coordinates */ + char **threeresSeq; /* three-letter residue sequence */ + atom *atoms; + atom **bbs; + atom **cas; + center_of_mass *cm; +} pdbf; + + + +/************************************************************* +* Localization Structures for converting characters to integers +**************************************************************/ +typedef struct gk_i2cc2i_t { + int n; + char *i2c; + int *c2i; +} gk_i2cc2i_t; + + +/******************************************************************* + *This structure implements storage of a protein sequence + * *****************************************************************/ +typedef struct gk_seq_t { + + int len; /*Number of Residues */ + int *sequence; /* Stores the sequence*/ + + + int **pssm; /* Stores the pssm matrix */ + int **psfm; /* Stores the psfm matrix */ + char *name; /* Stores the name of the sequence */ + + int nsymbols; + + +} gk_seq_t; + + + + +/*************************************************************************/ +/*! The following data structure stores information about a memory + allocation operation that can either be served from gk_mcore_t or by + a gk_malloc if not sufficient workspace memory is available. */ +/*************************************************************************/ +typedef struct gk_mop_t { + int type; + ssize_t nbytes; + void *ptr; +} gk_mop_t; + + +/*************************************************************************/ +/*! The following structure defines the mcore for GKlib's customized + memory allocations. */ +/*************************************************************************/ +typedef struct gk_mcore_t { + /* Workspace information */ + size_t coresize; /*!< The amount of core memory that has been allocated */ + size_t corecpos; /*!< Index of the first free location in core */ + void *core; /*!< Pointer to the core itself */ + + /* These are for implementing a stack-based allocation scheme using both + core and also dynamically allocated memory */ + size_t nmops; /*!< The number of maop_t entries that have been allocated */ + size_t cmop; /*!< Index of the first free location in maops */ + gk_mop_t *mops; /*!< The array recording the maop_t operations */ + + /* These are for keeping various statistics for wspacemalloc */ + size_t num_callocs; /*!< The number of core mallocs */ + size_t num_hallocs; /*!< The number of heap mallocs */ + size_t size_callocs; /*!< The total # of bytes in core mallocs */ + size_t size_hallocs; /*!< The total # of bytes in heap mallocs */ + size_t cur_callocs; /*!< The current # of bytes in core mallocs */ + size_t cur_hallocs; /*!< The current # of bytes in heap mallocs */ + size_t max_callocs; /*!< The maximum # of bytes in core mallocs at any given time */ + size_t max_hallocs; /*!< The maximum # of bytes in heap mallocs at any given time */ + +} gk_mcore_t; + + +/*************************************************************************/ +/*! The following structure is used for cache simulation for performance + modeling and analysis. */ +/*************************************************************************/ +typedef struct gk_cache_t { + /*! The total cache is nway*(2^(cnbits+lnbits)) bytes */ + uint32_t nway; /*!< the associativity of the cache */ + uint32_t lnbits; /*!< the number of address bits indexing the cache line */ + uint32_t cnbits; /*!< the number of address bits indexing the cache */ + size_t csize; /*!< 2^cnbits */ + size_t cmask; /*!< csize-1 */ + + uint64_t clock; /*!< a clock in terms of accesses */ + + uint64_t *latimes; /*!< a cacheline-level last access time */ + size_t *clines; /*!< the cache in terms of cachelines */ + + uint64_t nhits; /*!< counts the number of hits */ + uint64_t nmisses; /*!< counts the number of misses */ +} gk_cache_t; + + +#endif diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_types.h b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_types.h new file mode 100644 index 00000000..57c11910 --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_types.h @@ -0,0 +1,38 @@ +/*! +\file gk_types.h +\brief This file contains basic scalar datatype used in GKlib + +\date Started 3/27/2007 +\author George +\version\verbatim $Id: gk_types.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim +*/ + +#ifndef _GK_TYPES_H_ +#define _GK_TYPES_H_ + +/************************************************************************* +* Basic data type definitions. These definitions allow GKlib to separate +* the following elemental types: +* - loop iterator variables, which are set to size_t +* - signed and unsigned int variables that can be set to any # of bits +* - signed and unsigned long variables that can be set to any # of bits +* - real variables, which can be set to single or double precision. +**************************************************************************/ +/*typedef ptrdiff_t gk_idx_t; */ /* index variable */ +typedef ssize_t gk_idx_t; /* index variable */ +typedef int32_t gk_int_t; /* integer values */ +typedef uint32_t gk_uint_t; /* unsigned integer values */ +typedef int64_t gk_long_t; /* long integer values */ +typedef uint64_t gk_ulong_t; /* unsigned long integer values */ +typedef float gk_real_t; /* real type */ +typedef double gk_dreal_t; /* double precission real type */ +typedef double gk_wclock_t; /* wall-clock time */ + +/*#define GK_IDX_MAX PTRDIFF_MAX*/ +#define GK_IDX_MAX ((SIZE_MAX>>1)-2) + +#define PRIGKIDX "zd" +#define SCNGKIDX "zd" + + +#endif diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_util.c b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_util.c new file mode 100644 index 00000000..e1e68db0 --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gk_util.c @@ -0,0 +1,107 @@ +/*! +\file util.c +\brief Various utility routines + +\date Started 4/12/2007 +\author George +\version\verbatim $Id: gk_util.c 16223 2014-02-15 21:34:09Z karypis $ \endverbatim +*/ + + +#include + + +/************************************************************************* +* This file randomly permutes the contents of an array. +* flag == 0, don't initialize perm +* flag == 1, set p[i] = i +**************************************************************************/ +void gk_RandomPermute(size_t n, int *p, int flag) +{ + size_t i, u, v; + int tmp; + + if (flag == 1) { + for (i=0; i 1; i++, a = a>>1); + return i-1; +} + + +/************************************************************************* +* This function checks if the argument is a power of 2 +**************************************************************************/ +int gk_ispow2(int a) +{ + return (a == (1<. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* this is for removing a compiler warning */ +void gkfooo() { return; } + +#ifdef USE_GKREGEX + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef _LIBC +/* We have to keep the namespace clean. */ +# define regfree(preg) __regfree (preg) +# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) +# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) +# define regerror(errcode, preg, errbuf, errbuf_size) \ + __regerror(errcode, preg, errbuf, errbuf_size) +# define re_set_registers(bu, re, nu, st, en) \ + __re_set_registers (bu, re, nu, st, en) +# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ + __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) +# define re_match(bufp, string, size, pos, regs) \ + __re_match (bufp, string, size, pos, regs) +# define re_search(bufp, string, size, startpos, range, regs) \ + __re_search (bufp, string, size, startpos, range, regs) +# define re_compile_pattern(pattern, length, bufp) \ + __re_compile_pattern (pattern, length, bufp) +# define re_set_syntax(syntax) __re_set_syntax (syntax) +# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ + __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) +# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) + +# include "../locale/localeinfo.h" +#endif + +#include "GKlib.h" + + +/******************************************************************************/ +/******************************************************************************/ +/******************************************************************************/ +/* GKINCLUDE #include "regex_internal.h" */ +/******************************************************************************/ +/******************************************************************************/ +/******************************************************************************/ +/* Extended regular expression matching and search library. + Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _REGEX_INTERNAL_H +#define _REGEX_INTERNAL_H 1 + +#include +#include +#include +#include +#include + +#if defined(__MINGW32_VERSION) || defined(_MSC_VER) +#define strcasecmp stricmp +#endif + +#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC +# include +#endif +#if defined HAVE_LOCALE_H || defined _LIBC +# include +#endif +#if defined HAVE_WCHAR_H || defined _LIBC +# include +#endif /* HAVE_WCHAR_H || _LIBC */ +#if defined HAVE_WCTYPE_H || defined _LIBC +# include +#endif /* HAVE_WCTYPE_H || _LIBC */ +#if defined HAVE_STDBOOL_H || defined _LIBC +# include +#else +typedef enum { false, true } bool; +#endif /* HAVE_STDBOOL_H || _LIBC */ +#if defined HAVE_STDINT_H || defined _LIBC +# include +#endif /* HAVE_STDINT_H || _LIBC */ +#if defined _LIBC +# include +#else +# define __libc_lock_define(CLASS,NAME) +# define __libc_lock_init(NAME) do { } while (0) +# define __libc_lock_lock(NAME) do { } while (0) +# define __libc_lock_unlock(NAME) do { } while (0) +#endif + +/* In case that the system doesn't have isblank(). */ +#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank +# define isblank(ch) ((ch) == ' ' || (ch) == '\t') +#endif + +#ifdef _LIBC +# ifndef _RE_DEFINE_LOCALE_FUNCTIONS +# define _RE_DEFINE_LOCALE_FUNCTIONS 1 +# include +# include +# include +# endif +#endif + +/* This is for other GNU distributions with internationalized messages. */ +#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC +# include +# ifdef _LIBC +# undef gettext +# define gettext(msgid) \ + INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES) +# endif +#else +# define gettext(msgid) (msgid) +#endif + +#ifndef gettext_noop +/* This define is so xgettext can find the internationalizable + strings. */ +# define gettext_noop(String) String +#endif + +/* For loser systems without the definition. */ +#ifndef SIZE_MAX +# define SIZE_MAX ((size_t) -1) +#endif + +#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC +# define RE_ENABLE_I18N +#endif + +#if __GNUC__ >= 3 +# define BE(expr, val) __builtin_expect (expr, val) +#else +# define BE(expr, val) (expr) +# define inline +#endif + +/* Number of single byte character. */ +#define SBC_MAX 256 + +#define COLL_ELEM_LEN_MAX 8 + +/* The character which represents newline. */ +#define NEWLINE_CHAR '\n' +#define WIDE_NEWLINE_CHAR L'\n' + +/* Rename to standard API for using out of glibc. */ +#ifndef _LIBC +# define __wctype wctype +# define __iswctype iswctype +# define __btowc btowc +# define __mempcpy mempcpy +# define __wcrtomb wcrtomb +# define __regfree regfree +# define attribute_hidden +#endif /* not _LIBC */ + +#ifdef __GNUC__ +# define __attribute(arg) __attribute__ (arg) +#else +# define __attribute(arg) +#endif + +extern const char __re_error_msgid[] attribute_hidden; +extern const size_t __re_error_msgid_idx[] attribute_hidden; + +/* An integer used to represent a set of bits. It must be unsigned, + and must be at least as wide as unsigned int. */ +typedef unsigned long int bitset_word_t; +/* All bits set in a bitset_word_t. */ +#define BITSET_WORD_MAX ULONG_MAX +/* Number of bits in a bitset_word_t. */ +#define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT) +/* Number of bitset_word_t in a bit_set. */ +#define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS) +typedef bitset_word_t bitset_t[BITSET_WORDS]; +typedef bitset_word_t *re_bitset_ptr_t; +typedef const bitset_word_t *re_const_bitset_ptr_t; + +#define bitset_set(set,i) \ + (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS) +#define bitset_clear(set,i) \ + (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS)) +#define bitset_contain(set,i) \ + (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS)) +#define bitset_empty(set) memset (set, '\0', sizeof (bitset_t)) +#define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t)) +#define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t)) + +#define PREV_WORD_CONSTRAINT 0x0001 +#define PREV_NOTWORD_CONSTRAINT 0x0002 +#define NEXT_WORD_CONSTRAINT 0x0004 +#define NEXT_NOTWORD_CONSTRAINT 0x0008 +#define PREV_NEWLINE_CONSTRAINT 0x0010 +#define NEXT_NEWLINE_CONSTRAINT 0x0020 +#define PREV_BEGBUF_CONSTRAINT 0x0040 +#define NEXT_ENDBUF_CONSTRAINT 0x0080 +#define WORD_DELIM_CONSTRAINT 0x0100 +#define NOT_WORD_DELIM_CONSTRAINT 0x0200 + +typedef enum +{ + INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, + WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, + WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, + INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, + LINE_FIRST = PREV_NEWLINE_CONSTRAINT, + LINE_LAST = NEXT_NEWLINE_CONSTRAINT, + BUF_FIRST = PREV_BEGBUF_CONSTRAINT, + BUF_LAST = NEXT_ENDBUF_CONSTRAINT, + WORD_DELIM = WORD_DELIM_CONSTRAINT, + NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT +} re_context_type; + +typedef struct +{ + int alloc; + int nelem; + int *elems; +} re_node_set; + +typedef enum +{ + NON_TYPE = 0, + + /* Node type, These are used by token, node, tree. */ + CHARACTER = 1, + END_OF_RE = 2, + SIMPLE_BRACKET = 3, + OP_BACK_REF = 4, + OP_PERIOD = 5, +#ifdef RE_ENABLE_I18N + COMPLEX_BRACKET = 6, + OP_UTF8_PERIOD = 7, +#endif /* RE_ENABLE_I18N */ + + /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used + when the debugger shows values of this enum type. */ +#define EPSILON_BIT 8 + OP_OPEN_SUBEXP = EPSILON_BIT | 0, + OP_CLOSE_SUBEXP = EPSILON_BIT | 1, + OP_ALT = EPSILON_BIT | 2, + OP_DUP_ASTERISK = EPSILON_BIT | 3, + ANCHOR = EPSILON_BIT | 4, + + /* Tree type, these are used only by tree. */ + CONCAT = 16, + SUBEXP = 17, + + /* Token type, these are used only by token. */ + OP_DUP_PLUS = 18, + OP_DUP_QUESTION, + OP_OPEN_BRACKET, + OP_CLOSE_BRACKET, + OP_CHARSET_RANGE, + OP_OPEN_DUP_NUM, + OP_CLOSE_DUP_NUM, + OP_NON_MATCH_LIST, + OP_OPEN_COLL_ELEM, + OP_CLOSE_COLL_ELEM, + OP_OPEN_EQUIV_CLASS, + OP_CLOSE_EQUIV_CLASS, + OP_OPEN_CHAR_CLASS, + OP_CLOSE_CHAR_CLASS, + OP_WORD, + OP_NOTWORD, + OP_SPACE, + OP_NOTSPACE, + BACK_SLASH + +} re_token_type_t; + +#ifdef RE_ENABLE_I18N +typedef struct +{ + /* Multibyte characters. */ + wchar_t *mbchars; + + /* Collating symbols. */ +# ifdef _LIBC + int32_t *coll_syms; +# endif + + /* Equivalence classes. */ +# ifdef _LIBC + int32_t *equiv_classes; +# endif + + /* Range expressions. */ +# ifdef _LIBC + uint32_t *range_starts; + uint32_t *range_ends; +# else /* not _LIBC */ + wchar_t *range_starts; + wchar_t *range_ends; +# endif /* not _LIBC */ + + /* Character classes. */ + wctype_t *char_classes; + + /* If this character set is the non-matching list. */ + unsigned int non_match : 1; + + /* # of multibyte characters. */ + int nmbchars; + + /* # of collating symbols. */ + int ncoll_syms; + + /* # of equivalence classes. */ + int nequiv_classes; + + /* # of range expressions. */ + int nranges; + + /* # of character classes. */ + int nchar_classes; +} re_charset_t; +#endif /* RE_ENABLE_I18N */ + +typedef struct +{ + union + { + unsigned char c; /* for CHARACTER */ + re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */ +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; /* for COMPLEX_BRACKET */ +#endif /* RE_ENABLE_I18N */ + int idx; /* for BACK_REF */ + re_context_type ctx_type; /* for ANCHOR */ + } opr; +#if __GNUC__ >= 2 + re_token_type_t type : 8; +#else + re_token_type_t type; +#endif + unsigned int constraint : 10; /* context constraint */ + unsigned int duplicated : 1; + unsigned int opt_subexp : 1; +#ifdef RE_ENABLE_I18N + unsigned int accept_mb : 1; + /* These 2 bits can be moved into the union if needed (e.g. if running out + of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */ + unsigned int mb_partial : 1; +#endif + unsigned int word_char : 1; +} re_token_t; + +#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT) + +struct re_string_t +{ + /* Indicate the raw buffer which is the original string passed as an + argument of regexec(), re_search(), etc.. */ + const unsigned char *raw_mbs; + /* Store the multibyte string. In case of "case insensitive mode" like + REG_ICASE, upper cases of the string are stored, otherwise MBS points + the same address that RAW_MBS points. */ + unsigned char *mbs; +#ifdef RE_ENABLE_I18N + /* Store the wide character string which is corresponding to MBS. */ + wint_t *wcs; + int *offsets; + mbstate_t cur_state; +#endif + /* Index in RAW_MBS. Each character mbs[i] corresponds to + raw_mbs[raw_mbs_idx + i]. */ + int raw_mbs_idx; + /* The length of the valid characters in the buffers. */ + int valid_len; + /* The corresponding number of bytes in raw_mbs array. */ + int valid_raw_len; + /* The length of the buffers MBS and WCS. */ + int bufs_len; + /* The index in MBS, which is updated by re_string_fetch_byte. */ + int cur_idx; + /* length of RAW_MBS array. */ + int raw_len; + /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */ + int len; + /* End of the buffer may be shorter than its length in the cases such + as re_match_2, re_search_2. Then, we use STOP for end of the buffer + instead of LEN. */ + int raw_stop; + /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */ + int stop; + + /* The context of mbs[0]. We store the context independently, since + the context of mbs[0] may be different from raw_mbs[0], which is + the beginning of the input string. */ + unsigned int tip_context; + /* The translation passed as a part of an argument of re_compile_pattern. */ + RE_TRANSLATE_TYPE trans; + /* Copy of re_dfa_t's word_char. */ + re_const_bitset_ptr_t word_char; + /* 1 if REG_ICASE. */ + unsigned char icase; + unsigned char is_utf8; + unsigned char map_notascii; + unsigned char mbs_allocated; + unsigned char offsets_needed; + unsigned char newline_anchor; + unsigned char word_ops_used; + int mb_cur_max; +}; +typedef struct re_string_t re_string_t; + + +struct re_dfa_t; +typedef struct re_dfa_t re_dfa_t; + +#ifndef _LIBC +# ifdef __i386__ +# define internal_function __attribute ((regparm (3), stdcall)) +# else +# define internal_function +# endif +#endif + +static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, + int new_buf_len) + internal_function; +#ifdef RE_ENABLE_I18N +static void build_wcs_buffer (re_string_t *pstr) internal_function; +static int build_wcs_upper_buffer (re_string_t *pstr) internal_function; +#endif /* RE_ENABLE_I18N */ +static void build_upper_buffer (re_string_t *pstr) internal_function; +static void re_string_translate_buffer (re_string_t *pstr) internal_function; +static unsigned int re_string_context_at (const re_string_t *input, int idx, + int eflags) + internal_function __attribute ((pure)); +#define re_string_peek_byte(pstr, offset) \ + ((pstr)->mbs[(pstr)->cur_idx + offset]) +#define re_string_fetch_byte(pstr) \ + ((pstr)->mbs[(pstr)->cur_idx++]) +#define re_string_first_byte(pstr, idx) \ + ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF) +#define re_string_is_single_byte_char(pstr, idx) \ + ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \ + || (pstr)->wcs[(idx) + 1] != WEOF)) +#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx) +#define re_string_cur_idx(pstr) ((pstr)->cur_idx) +#define re_string_get_buffer(pstr) ((pstr)->mbs) +#define re_string_length(pstr) ((pstr)->len) +#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx]) +#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx)) +#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx)) + +#ifdef __GNUC__ +# define alloca(size) __builtin_alloca (size) +# define HAVE_ALLOCA 1 +#elif defined(_MSC_VER) +# include +# define alloca _alloca +# define HAVE_ALLOCA 1 +#else +# error No alloca() +#endif + +#ifndef _LIBC +# if HAVE_ALLOCA +/* The OS usually guarantees only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + allocate anything larger than 4096 bytes. Also care for the possibility + of a few compiler-allocated temporary stack slots. */ +# define __libc_use_alloca(n) ((n) < 4032) +# else +/* alloca is implemented with malloc, so just use malloc. */ +# define __libc_use_alloca(n) 0 +# endif +#endif + +#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t))) +#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t))) +#define re_free(p) free (p) + +struct bin_tree_t +{ + struct bin_tree_t *parent; + struct bin_tree_t *left; + struct bin_tree_t *right; + struct bin_tree_t *first; + struct bin_tree_t *next; + + re_token_t token; + + /* `node_idx' is the index in dfa->nodes, if `type' == 0. + Otherwise `type' indicate the type of this node. */ + int node_idx; +}; +typedef struct bin_tree_t bin_tree_t; + +#define BIN_TREE_STORAGE_SIZE \ + ((1024 - sizeof (void *)) / sizeof (bin_tree_t)) + +struct bin_tree_storage_t +{ + struct bin_tree_storage_t *next; + bin_tree_t data[BIN_TREE_STORAGE_SIZE]; +}; +typedef struct bin_tree_storage_t bin_tree_storage_t; + +#define CONTEXT_WORD 1 +#define CONTEXT_NEWLINE (CONTEXT_WORD << 1) +#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1) +#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1) + +#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD) +#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE) +#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF) +#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF) +#define IS_ORDINARY_CONTEXT(c) ((c) == 0) + +#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_') +#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR) +#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_') +#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR) + +#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \ + ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ + || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ + || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\ + || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context))) + +#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \ + ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ + || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ + || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \ + || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context))) + +struct re_dfastate_t +{ + unsigned int hash; + re_node_set nodes; + re_node_set non_eps_nodes; + re_node_set inveclosure; + re_node_set *entrance_nodes; + struct re_dfastate_t **trtable, **word_trtable; + unsigned int context : 4; + unsigned int halt : 1; + /* If this state can accept `multi byte'. + Note that we refer to multibyte characters, and multi character + collating elements as `multi byte'. */ + unsigned int accept_mb : 1; + /* If this state has backreference node(s). */ + unsigned int has_backref : 1; + unsigned int has_constraint : 1; +}; +typedef struct re_dfastate_t re_dfastate_t; + +struct re_state_table_entry +{ + int num; + int alloc; + re_dfastate_t **array; +}; + +/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */ + +typedef struct +{ + int next_idx; + int alloc; + re_dfastate_t **array; +} state_array_t; + +/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */ + +typedef struct +{ + int node; + int str_idx; /* The position NODE match at. */ + state_array_t path; +} re_sub_match_last_t; + +/* Store information about the node NODE whose type is OP_OPEN_SUBEXP. + And information about the node, whose type is OP_CLOSE_SUBEXP, + corresponding to NODE is stored in LASTS. */ + +typedef struct +{ + int str_idx; + int node; + state_array_t *path; + int alasts; /* Allocation size of LASTS. */ + int nlasts; /* The number of LASTS. */ + re_sub_match_last_t **lasts; +} re_sub_match_top_t; + +struct re_backref_cache_entry +{ + int node; + int str_idx; + int subexp_from; + int subexp_to; + char more; + char unused; + unsigned short int eps_reachable_subexps_map; +}; + +typedef struct +{ + /* The string object corresponding to the input string. */ + re_string_t input; +#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) + const re_dfa_t *const dfa; +#else + const re_dfa_t *dfa; +#endif + /* EFLAGS of the argument of regexec. */ + int eflags; + /* Where the matching ends. */ + int match_last; + int last_node; + /* The state log used by the matcher. */ + re_dfastate_t **state_log; + int state_log_top; + /* Back reference cache. */ + int nbkref_ents; + int abkref_ents; + struct re_backref_cache_entry *bkref_ents; + int max_mb_elem_len; + int nsub_tops; + int asub_tops; + re_sub_match_top_t **sub_tops; +} re_match_context_t; + +typedef struct +{ + re_dfastate_t **sifted_states; + re_dfastate_t **limited_states; + int last_node; + int last_str_idx; + re_node_set limits; +} re_sift_context_t; + +struct re_fail_stack_ent_t +{ + int idx; + int node; + regmatch_t *regs; + re_node_set eps_via_nodes; +}; + +struct re_fail_stack_t +{ + int num; + int alloc; + struct re_fail_stack_ent_t *stack; +}; + +struct re_dfa_t +{ + re_token_t *nodes; + size_t nodes_alloc; + size_t nodes_len; + int *nexts; + int *org_indices; + re_node_set *edests; + re_node_set *eclosures; + re_node_set *inveclosures; + struct re_state_table_entry *state_table; + re_dfastate_t *init_state; + re_dfastate_t *init_state_word; + re_dfastate_t *init_state_nl; + re_dfastate_t *init_state_begbuf; + bin_tree_t *str_tree; + bin_tree_storage_t *str_tree_storage; + re_bitset_ptr_t sb_char; + int str_tree_storage_idx; + + /* number of subexpressions `re_nsub' is in regex_t. */ + unsigned int state_hash_mask; + int init_node; + int nbackref; /* The number of backreference in this dfa. */ + + /* Bitmap expressing which backreference is used. */ + bitset_word_t used_bkref_map; + bitset_word_t completed_bkref_map; + + unsigned int has_plural_match : 1; + /* If this dfa has "multibyte node", which is a backreference or + a node which can accept multibyte character or multi character + collating element. */ + unsigned int has_mb_node : 1; + unsigned int is_utf8 : 1; + unsigned int map_notascii : 1; + unsigned int word_ops_used : 1; + int mb_cur_max; + bitset_t word_char; + reg_syntax_t syntax; + int *subexp_map; +#ifdef DEBUG + char* re_str; +#endif + __libc_lock_define (, lock) +}; + +#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set)) +#define re_node_set_remove(set,id) \ + (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1)) +#define re_node_set_empty(p) ((p)->nelem = 0) +#define re_node_set_free(set) re_free ((set)->elems) + + +typedef enum +{ + SB_CHAR, + MB_CHAR, + EQUIV_CLASS, + COLL_SYM, + CHAR_CLASS +} bracket_elem_type; + +typedef struct +{ + bracket_elem_type type; + union + { + unsigned char ch; + unsigned char *name; + wchar_t wch; + } opr; +} bracket_elem_t; + + +/* Inline functions for bitset operation. */ +static inline void +bitset_not (bitset_t set) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + set[bitset_i] = ~set[bitset_i]; +} + +static inline void +bitset_merge (bitset_t dest, const bitset_t src) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + dest[bitset_i] |= src[bitset_i]; +} + +static inline void +bitset_mask (bitset_t dest, const bitset_t src) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + dest[bitset_i] &= src[bitset_i]; +} + +#ifdef RE_ENABLE_I18N +/* Inline functions for re_string. */ +static inline int +internal_function __attribute ((pure)) +re_string_char_size_at (const re_string_t *pstr, int idx) +{ + int byte_idx; + if (pstr->mb_cur_max == 1) + return 1; + for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx) + if (pstr->wcs[idx + byte_idx] != WEOF) + break; + return byte_idx; +} + +static inline wint_t +internal_function __attribute ((pure)) +re_string_wchar_at (const re_string_t *pstr, int idx) +{ + if (pstr->mb_cur_max == 1) + return (wint_t) pstr->mbs[idx]; + return (wint_t) pstr->wcs[idx]; +} + +static int +internal_function __attribute ((pure)) +re_string_elem_size_at (const re_string_t *pstr, int idx) +{ +# ifdef _LIBC + const unsigned char *p, *extra; + const int32_t *table, *indirect; + int32_t tmp; +# include + uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + + if (nrules != 0) + { + table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_INDIRECTMB); + p = pstr->mbs + idx; + tmp = findidx (&p); + return p - pstr->mbs - idx; + } + else +# endif /* _LIBC */ + return 1; +} +#endif /* RE_ENABLE_I18N */ + +#endif /* _REGEX_INTERNAL_H */ + +/******************************************************************************/ +/******************************************************************************/ +/******************************************************************************/ +/* GKINCLUDE #include "regex_internal.c" */ +/******************************************************************************/ +/******************************************************************************/ +/******************************************************************************/ +/* Extended regular expression matching and search library. + Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +static void re_string_construct_common (const char *str, int len, + re_string_t *pstr, + RE_TRANSLATE_TYPE trans, int icase, + const re_dfa_t *dfa) internal_function; +static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa, + const re_node_set *nodes, + unsigned int hash) internal_function; +static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa, + const re_node_set *nodes, + unsigned int context, + unsigned int hash) internal_function; + +/* Functions for string operation. */ + +/* This function allocate the buffers. It is necessary to call + re_string_reconstruct before using the object. */ + +static reg_errcode_t +internal_function +re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len, + RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa) +{ + reg_errcode_t ret; + int init_buf_len; + + /* Ensure at least one character fits into the buffers. */ + if (init_len < dfa->mb_cur_max) + init_len = dfa->mb_cur_max; + init_buf_len = (len + 1 < init_len) ? len + 1: init_len; + re_string_construct_common (str, len, pstr, trans, icase, dfa); + + ret = re_string_realloc_buffers (pstr, init_buf_len); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + pstr->word_char = dfa->word_char; + pstr->word_ops_used = dfa->word_ops_used; + pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; + pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len; + pstr->valid_raw_len = pstr->valid_len; + return REG_NOERROR; +} + +/* This function allocate the buffers, and initialize them. */ + +static reg_errcode_t +internal_function +re_string_construct (re_string_t *pstr, const char *str, int len, + RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa) +{ + reg_errcode_t ret; + memset (pstr, '\0', sizeof (re_string_t)); + re_string_construct_common (str, len, pstr, trans, icase, dfa); + + if (len > 0) + { + ret = re_string_realloc_buffers (pstr, len + 1); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; + + if (icase) + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + while (1) + { + ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + if (pstr->valid_raw_len >= len) + break; + if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max) + break; + ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + } + else +#endif /* RE_ENABLE_I18N */ + build_upper_buffer (pstr); + } + else + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + build_wcs_buffer (pstr); + else +#endif /* RE_ENABLE_I18N */ + { + if (trans != NULL) + re_string_translate_buffer (pstr); + else + { + pstr->valid_len = pstr->bufs_len; + pstr->valid_raw_len = pstr->bufs_len; + } + } + } + + return REG_NOERROR; +} + +/* Helper functions for re_string_allocate, and re_string_construct. */ + +static reg_errcode_t +internal_function +re_string_realloc_buffers (re_string_t *pstr, int new_buf_len) +{ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + wint_t *new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); + if (BE (new_wcs == NULL, 0)) + return REG_ESPACE; + pstr->wcs = new_wcs; + if (pstr->offsets != NULL) + { + int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len); + if (BE (new_offsets == NULL, 0)) + return REG_ESPACE; + pstr->offsets = new_offsets; + } + } +#endif /* RE_ENABLE_I18N */ + if (pstr->mbs_allocated) + { + unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char, + new_buf_len); + if (BE (new_mbs == NULL, 0)) + return REG_ESPACE; + pstr->mbs = new_mbs; + } + pstr->bufs_len = new_buf_len; + return REG_NOERROR; +} + + +static void +internal_function +re_string_construct_common (const char *str, int len, re_string_t *pstr, + RE_TRANSLATE_TYPE trans, int icase, + const re_dfa_t *dfa) +{ + pstr->raw_mbs = (const unsigned char *) str; + pstr->len = len; + pstr->raw_len = len; + pstr->trans = trans; + pstr->icase = icase ? 1 : 0; + pstr->mbs_allocated = (trans != NULL || icase); + pstr->mb_cur_max = dfa->mb_cur_max; + pstr->is_utf8 = dfa->is_utf8; + pstr->map_notascii = dfa->map_notascii; + pstr->stop = pstr->len; + pstr->raw_stop = pstr->stop; +} + +#ifdef RE_ENABLE_I18N + +/* Build wide character buffer PSTR->WCS. + If the byte sequence of the string are: + (0), (1), (0), (1), + Then wide character buffer will be: + , WEOF , , WEOF , + We use WEOF for padding, they indicate that the position isn't + a first byte of a multibyte character. + + Note that this function assumes PSTR->VALID_LEN elements are already + built and starts from PSTR->VALID_LEN. */ + +static void +internal_function +build_wcs_buffer (re_string_t *pstr) +{ +#ifdef _LIBC + unsigned char buf[MB_LEN_MAX]; + assert (MB_LEN_MAX >= pstr->mb_cur_max); +#else + unsigned char buf[64]; +#endif + mbstate_t prev_st; + int byte_idx, end_idx, remain_len; + size_t mbclen; + + /* Build the buffers from pstr->valid_len to either pstr->len or + pstr->bufs_len. */ + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + for (byte_idx = pstr->valid_len; byte_idx < end_idx;) + { + wchar_t wc; + const char *p; + + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + /* Apply the translation if we need. */ + if (BE (pstr->trans != NULL, 0)) + { + int i, ch; + + for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) + { + ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i]; + buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch]; + } + p = (const char *) buf; + } + else + p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx; + mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state); + if (BE (mbclen == (size_t) -2, 0)) + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0)) + { + /* We treat these cases as a singlebyte character. */ + mbclen = 1; + wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; + if (BE (pstr->trans != NULL, 0)) + wc = pstr->trans[wc]; + pstr->cur_state = prev_st; + } + + /* Write wide character and padding. */ + pstr->wcs[byte_idx++] = wc; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = byte_idx; +} + +/* Build wide character buffer PSTR->WCS like build_wcs_buffer, + but for REG_ICASE. */ + +static reg_errcode_t +internal_function +build_wcs_upper_buffer (re_string_t *pstr) +{ + mbstate_t prev_st; + int src_idx, byte_idx, end_idx, remain_len; + size_t mbclen; +#ifdef _LIBC + char buf[MB_LEN_MAX]; + assert (MB_LEN_MAX >= pstr->mb_cur_max); +#else + char buf[64]; +#endif + + byte_idx = pstr->valid_len; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + /* The following optimization assumes that ASCII characters can be + mapped to wide characters with a simple cast. */ + if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed) + { + while (byte_idx < end_idx) + { + wchar_t wc; + + if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]) + && mbsinit (&pstr->cur_state)) + { + /* In case of a singlebyte character. */ + pstr->mbs[byte_idx] + = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]); + /* The next step uses the assumption that wchar_t is encoded + ASCII-safe: all ASCII values can be converted like this. */ + pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx]; + ++byte_idx; + continue; + } + + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + mbclen = mbrtowc (&wc, + ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx + + byte_idx), remain_len, &pstr->cur_state); + if (BE (mbclen + 2 > 2, 1)) + { + wchar_t wcu = wc; + if (iswlower (wc)) + { + size_t mbcdlen; + + wcu = towupper (wc); + mbcdlen = wcrtomb (buf, wcu, &prev_st); + if (BE (mbclen == mbcdlen, 1)) + memcpy (pstr->mbs + byte_idx, buf, mbclen); + else + { + src_idx = byte_idx; + goto offsets_needed; + } + } + else + memcpy (pstr->mbs + byte_idx, + pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen); + pstr->wcs[byte_idx++] = wcu; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + else if (mbclen == (size_t) -1 || mbclen == 0) + { + /* It is an invalid character or '\0'. Just use the byte. */ + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; + pstr->mbs[byte_idx] = ch; + /* And also cast it to wide char. */ + pstr->wcs[byte_idx++] = (wchar_t) ch; + if (BE (mbclen == (size_t) -1, 0)) + pstr->cur_state = prev_st; + } + else + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = byte_idx; + return REG_NOERROR; + } + else + for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;) + { + wchar_t wc; + const char *p; + offsets_needed: + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + if (BE (pstr->trans != NULL, 0)) + { + int i, ch; + + for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) + { + ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i]; + buf[i] = pstr->trans[ch]; + } + p = (const char *) buf; + } + else + p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx; + mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state); + if (BE (mbclen + 2 > 2, 1)) + { + wchar_t wcu = wc; + if (iswlower (wc)) + { + size_t mbcdlen; + + wcu = towupper (wc); + mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st); + if (BE (mbclen == mbcdlen, 1)) + memcpy (pstr->mbs + byte_idx, buf, mbclen); + else if (mbcdlen != (size_t) -1) + { + size_t i; + + if (byte_idx + mbcdlen > pstr->bufs_len) + { + pstr->cur_state = prev_st; + break; + } + + if (pstr->offsets == NULL) + { + pstr->offsets = re_malloc (int, pstr->bufs_len); + + if (pstr->offsets == NULL) + return REG_ESPACE; + } + if (!pstr->offsets_needed) + { + for (i = 0; i < (size_t) byte_idx; ++i) + pstr->offsets[i] = i; + pstr->offsets_needed = 1; + } + + memcpy (pstr->mbs + byte_idx, buf, mbcdlen); + pstr->wcs[byte_idx] = wcu; + pstr->offsets[byte_idx] = src_idx; + for (i = 1; i < mbcdlen; ++i) + { + pstr->offsets[byte_idx + i] + = src_idx + (i < mbclen ? i : mbclen - 1); + pstr->wcs[byte_idx + i] = WEOF; + } + pstr->len += mbcdlen - mbclen; + if (pstr->raw_stop > src_idx) + pstr->stop += mbcdlen - mbclen; + end_idx = (pstr->bufs_len > pstr->len) + ? pstr->len : pstr->bufs_len; + byte_idx += mbcdlen; + src_idx += mbclen; + continue; + } + else + memcpy (pstr->mbs + byte_idx, p, mbclen); + } + else + memcpy (pstr->mbs + byte_idx, p, mbclen); + + if (BE (pstr->offsets_needed != 0, 0)) + { + size_t i; + for (i = 0; i < mbclen; ++i) + pstr->offsets[byte_idx + i] = src_idx + i; + } + src_idx += mbclen; + + pstr->wcs[byte_idx++] = wcu; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + else if (mbclen == (size_t) -1 || mbclen == 0) + { + /* It is an invalid character or '\0'. Just use the byte. */ + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx]; + + if (BE (pstr->trans != NULL, 0)) + ch = pstr->trans [ch]; + pstr->mbs[byte_idx] = ch; + + if (BE (pstr->offsets_needed != 0, 0)) + pstr->offsets[byte_idx] = src_idx; + ++src_idx; + + /* And also cast it to wide char. */ + pstr->wcs[byte_idx++] = (wchar_t) ch; + if (BE (mbclen == (size_t) -1, 0)) + pstr->cur_state = prev_st; + } + else + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = src_idx; + return REG_NOERROR; +} + +/* Skip characters until the index becomes greater than NEW_RAW_IDX. + Return the index. */ + +static int +internal_function +re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc) +{ + mbstate_t prev_st; + int rawbuf_idx; + size_t mbclen; + wchar_t wc = WEOF; + + /* Skip the characters which are not necessary to check. */ + for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len; + rawbuf_idx < new_raw_idx;) + { + int remain_len; + remain_len = pstr->len - rawbuf_idx; + prev_st = pstr->cur_state; + mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx, + remain_len, &pstr->cur_state); + if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0)) + { + /* We treat these cases as a single byte character. */ + if (mbclen == 0 || remain_len == 0) + wc = L'\0'; + else + wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx); + mbclen = 1; + pstr->cur_state = prev_st; + } + /* Then proceed the next character. */ + rawbuf_idx += mbclen; + } + *last_wc = (wint_t) wc; + return rawbuf_idx; +} +#endif /* RE_ENABLE_I18N */ + +/* Build the buffer PSTR->MBS, and apply the translation if we need. + This function is used in case of REG_ICASE. */ + +static void +internal_function +build_upper_buffer (re_string_t *pstr) +{ + int char_idx, end_idx; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx) + { + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx]; + if (BE (pstr->trans != NULL, 0)) + ch = pstr->trans[ch]; + if (islower (ch)) + pstr->mbs[char_idx] = toupper (ch); + else + pstr->mbs[char_idx] = ch; + } + pstr->valid_len = char_idx; + pstr->valid_raw_len = char_idx; +} + +/* Apply TRANS to the buffer in PSTR. */ + +static void +internal_function +re_string_translate_buffer (re_string_t *pstr) +{ + int buf_idx, end_idx; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx) + { + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx]; + pstr->mbs[buf_idx] = pstr->trans[ch]; + } + + pstr->valid_len = buf_idx; + pstr->valid_raw_len = buf_idx; +} + +/* This function re-construct the buffers. + Concretely, convert to wide character in case of pstr->mb_cur_max > 1, + convert to upper case in case of REG_ICASE, apply translation. */ + +static reg_errcode_t +internal_function +re_string_reconstruct (re_string_t *pstr, int idx, int eflags) +{ + int offset = idx - pstr->raw_mbs_idx; + if (BE (offset < 0, 0)) + { + /* Reset buffer. */ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); +#endif /* RE_ENABLE_I18N */ + pstr->len = pstr->raw_len; + pstr->stop = pstr->raw_stop; + pstr->valid_len = 0; + pstr->raw_mbs_idx = 0; + pstr->valid_raw_len = 0; + pstr->offsets_needed = 0; + pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF + : CONTEXT_NEWLINE | CONTEXT_BEGBUF); + if (!pstr->mbs_allocated) + pstr->mbs = (unsigned char *) pstr->raw_mbs; + offset = idx; + } + + if (BE (offset != 0, 1)) + { + /* Should the already checked characters be kept? */ + if (BE (offset < pstr->valid_raw_len, 1)) + { + /* Yes, move them to the front of the buffer. */ +#ifdef RE_ENABLE_I18N + if (BE (pstr->offsets_needed, 0)) + { + int low = 0, high = pstr->valid_len, mid; + do + { + mid = (high + low) / 2; + if (pstr->offsets[mid] > offset) + high = mid; + else if (pstr->offsets[mid] < offset) + low = mid + 1; + else + break; + } + while (low < high); + if (pstr->offsets[mid] < offset) + ++mid; + pstr->tip_context = re_string_context_at (pstr, mid - 1, + eflags); + /* This can be quite complicated, so handle specially + only the common and easy case where the character with + different length representation of lower and upper + case is present at or after offset. */ + if (pstr->valid_len > offset + && mid == offset && pstr->offsets[mid] == offset) + { + memmove (pstr->wcs, pstr->wcs + offset, + (pstr->valid_len - offset) * sizeof (wint_t)); + memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset); + pstr->valid_len -= offset; + pstr->valid_raw_len -= offset; + for (low = 0; low < pstr->valid_len; low++) + pstr->offsets[low] = pstr->offsets[low + offset] - offset; + } + else + { + /* Otherwise, just find out how long the partial multibyte + character at offset is and fill it with WEOF/255. */ + pstr->len = pstr->raw_len - idx + offset; + pstr->stop = pstr->raw_stop - idx + offset; + pstr->offsets_needed = 0; + while (mid > 0 && pstr->offsets[mid - 1] == offset) + --mid; + while (mid < pstr->valid_len) + if (pstr->wcs[mid] != WEOF) + break; + else + ++mid; + if (mid == pstr->valid_len) + pstr->valid_len = 0; + else + { + pstr->valid_len = pstr->offsets[mid] - offset; + if (pstr->valid_len) + { + for (low = 0; low < pstr->valid_len; ++low) + pstr->wcs[low] = WEOF; + memset (pstr->mbs, 255, pstr->valid_len); + } + } + pstr->valid_raw_len = pstr->valid_len; + } + } + else +#endif + { + pstr->tip_context = re_string_context_at (pstr, offset - 1, + eflags); +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + memmove (pstr->wcs, pstr->wcs + offset, + (pstr->valid_len - offset) * sizeof (wint_t)); +#endif /* RE_ENABLE_I18N */ + if (BE (pstr->mbs_allocated, 0)) + memmove (pstr->mbs, pstr->mbs + offset, + pstr->valid_len - offset); + pstr->valid_len -= offset; + pstr->valid_raw_len -= offset; +#if DEBUG + assert (pstr->valid_len > 0); +#endif + } + } + else + { + /* No, skip all characters until IDX. */ + int prev_valid_len = pstr->valid_len; + +#ifdef RE_ENABLE_I18N + if (BE (pstr->offsets_needed, 0)) + { + pstr->len = pstr->raw_len - idx + offset; + pstr->stop = pstr->raw_stop - idx + offset; + pstr->offsets_needed = 0; + } +#endif + pstr->valid_len = 0; +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + int wcs_idx; + wint_t wc = WEOF; + + if (pstr->is_utf8) + { + const unsigned char *raw, *p, *q, *end; + + /* Special case UTF-8. Multi-byte chars start with any + byte other than 0x80 - 0xbf. */ + raw = pstr->raw_mbs + pstr->raw_mbs_idx; + end = raw + (offset - pstr->mb_cur_max); + if (end < pstr->raw_mbs) + end = pstr->raw_mbs; + p = raw + offset - 1; +#ifdef _LIBC + /* We know the wchar_t encoding is UCS4, so for the simple + case, ASCII characters, skip the conversion step. */ + if (isascii (*p) && BE (pstr->trans == NULL, 1)) + { + memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); + /* pstr->valid_len = 0; */ + wc = (wchar_t) *p; + } + else +#endif + for (; p >= end; --p) + if ((*p & 0xc0) != 0x80) + { + mbstate_t cur_state; + wchar_t wc2; + int mlen = raw + pstr->len - p; + unsigned char buf[6]; + size_t mbclen; + + q = p; + if (BE (pstr->trans != NULL, 0)) + { + int i = mlen < 6 ? mlen : 6; + while (--i >= 0) + buf[i] = pstr->trans[p[i]]; + q = buf; + } + /* XXX Don't use mbrtowc, we know which conversion + to use (UTF-8 -> UCS4). */ + memset (&cur_state, 0, sizeof (cur_state)); + mbclen = mbrtowc (&wc2, (const char *) p, mlen, + &cur_state); + if (raw + offset - p <= mbclen + && mbclen < (size_t) -2) + { + memset (&pstr->cur_state, '\0', + sizeof (mbstate_t)); + pstr->valid_len = mbclen - (raw + offset - p); + wc = wc2; + } + break; + } + } + + if (wc == WEOF) + pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx; + if (wc == WEOF) + pstr->tip_context + = re_string_context_at (pstr, prev_valid_len - 1, eflags); + else + pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0) + && IS_WIDE_WORD_CHAR (wc)) + ? CONTEXT_WORD + : ((IS_WIDE_NEWLINE (wc) + && pstr->newline_anchor) + ? CONTEXT_NEWLINE : 0)); + if (BE (pstr->valid_len, 0)) + { + for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx) + pstr->wcs[wcs_idx] = WEOF; + if (pstr->mbs_allocated) + memset (pstr->mbs, 255, pstr->valid_len); + } + pstr->valid_raw_len = pstr->valid_len; + } + else +#endif /* RE_ENABLE_I18N */ + { + int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1]; + pstr->valid_raw_len = 0; + if (pstr->trans) + c = pstr->trans[c]; + pstr->tip_context = (bitset_contain (pstr->word_char, c) + ? CONTEXT_WORD + : ((IS_NEWLINE (c) && pstr->newline_anchor) + ? CONTEXT_NEWLINE : 0)); + } + } + if (!BE (pstr->mbs_allocated, 0)) + pstr->mbs += offset; + } + pstr->raw_mbs_idx = idx; + pstr->len -= offset; + pstr->stop -= offset; + + /* Then build the buffers. */ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + if (pstr->icase) + { + reg_errcode_t ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + else + build_wcs_buffer (pstr); + } + else +#endif /* RE_ENABLE_I18N */ + if (BE (pstr->mbs_allocated, 0)) + { + if (pstr->icase) + build_upper_buffer (pstr); + else if (pstr->trans != NULL) + re_string_translate_buffer (pstr); + } + else + pstr->valid_len = pstr->len; + + pstr->cur_idx = 0; + return REG_NOERROR; +} + +static unsigned char +internal_function __attribute ((pure)) +re_string_peek_byte_case (const re_string_t *pstr, int idx) +{ + int ch, off; + + /* Handle the common (easiest) cases first. */ + if (BE (!pstr->mbs_allocated, 1)) + return re_string_peek_byte (pstr, idx); + +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1 + && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx)) + return re_string_peek_byte (pstr, idx); +#endif + + off = pstr->cur_idx + idx; +#ifdef RE_ENABLE_I18N + if (pstr->offsets_needed) + off = pstr->offsets[off]; +#endif + + ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; + +#ifdef RE_ENABLE_I18N + /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I + this function returns CAPITAL LETTER I instead of first byte of + DOTLESS SMALL LETTER I. The latter would confuse the parser, + since peek_byte_case doesn't advance cur_idx in any way. */ + if (pstr->offsets_needed && !isascii (ch)) + return re_string_peek_byte (pstr, idx); +#endif + + return ch; +} + +static unsigned char +internal_function __attribute ((pure)) +re_string_fetch_byte_case (re_string_t *pstr) +{ + if (BE (!pstr->mbs_allocated, 1)) + return re_string_fetch_byte (pstr); + +#ifdef RE_ENABLE_I18N + if (pstr->offsets_needed) + { + int off, ch; + + /* For tr_TR.UTF-8 [[:islower:]] there is + [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip + in that case the whole multi-byte character and return + the original letter. On the other side, with + [[: DOTLESS SMALL LETTER I return [[:I, as doing + anything else would complicate things too much. */ + + if (!re_string_first_byte (pstr, pstr->cur_idx)) + return re_string_fetch_byte (pstr); + + off = pstr->offsets[pstr->cur_idx]; + ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; + + if (! isascii (ch)) + return re_string_fetch_byte (pstr); + + re_string_skip_bytes (pstr, + re_string_char_size_at (pstr, pstr->cur_idx)); + return ch; + } +#endif + + return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++]; +} + +static void +internal_function +re_string_destruct (re_string_t *pstr) +{ +#ifdef RE_ENABLE_I18N + re_free (pstr->wcs); + re_free (pstr->offsets); +#endif /* RE_ENABLE_I18N */ + if (pstr->mbs_allocated) + re_free (pstr->mbs); +} + +/* Return the context at IDX in INPUT. */ + +static unsigned int +internal_function +re_string_context_at (const re_string_t *input, int idx, int eflags) +{ + int c; + if (BE (idx < 0, 0)) + /* In this case, we use the value stored in input->tip_context, + since we can't know the character in input->mbs[-1] here. */ + return input->tip_context; + if (BE (idx == input->len, 0)) + return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF + : CONTEXT_NEWLINE | CONTEXT_ENDBUF); +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc; + int wc_idx = idx; + while(input->wcs[wc_idx] == WEOF) + { +#ifdef DEBUG + /* It must not happen. */ + assert (wc_idx >= 0); +#endif + --wc_idx; + if (wc_idx < 0) + return input->tip_context; + } + wc = input->wcs[wc_idx]; + if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc)) + return CONTEXT_WORD; + return (IS_WIDE_NEWLINE (wc) && input->newline_anchor + ? CONTEXT_NEWLINE : 0); + } + else +#endif + { + c = re_string_byte_at (input, idx); + if (bitset_contain (input->word_char, c)) + return CONTEXT_WORD; + return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0; + } +} + +/* Functions for set operation. */ + +static reg_errcode_t +internal_function +re_node_set_alloc (re_node_set *set, int size) +{ + set->alloc = size; + set->nelem = 0; + set->elems = re_malloc (int, size); + if (BE (set->elems == NULL, 0)) + return REG_ESPACE; + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_1 (re_node_set *set, int elem) +{ + set->alloc = 1; + set->nelem = 1; + set->elems = re_malloc (int, 1); + if (BE (set->elems == NULL, 0)) + { + set->alloc = set->nelem = 0; + return REG_ESPACE; + } + set->elems[0] = elem; + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_2 (re_node_set *set, int elem1, int elem2) +{ + set->alloc = 2; + set->elems = re_malloc (int, 2); + if (BE (set->elems == NULL, 0)) + return REG_ESPACE; + if (elem1 == elem2) + { + set->nelem = 1; + set->elems[0] = elem1; + } + else + { + set->nelem = 2; + if (elem1 < elem2) + { + set->elems[0] = elem1; + set->elems[1] = elem2; + } + else + { + set->elems[0] = elem2; + set->elems[1] = elem1; + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_copy (re_node_set *dest, const re_node_set *src) +{ + dest->nelem = src->nelem; + if (src->nelem > 0) + { + dest->alloc = dest->nelem; + dest->elems = re_malloc (int, dest->alloc); + if (BE (dest->elems == NULL, 0)) + { + dest->alloc = dest->nelem = 0; + return REG_ESPACE; + } + memcpy (dest->elems, src->elems, src->nelem * sizeof (int)); + } + else + re_node_set_init_empty (dest); + return REG_NOERROR; +} + +/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. + Note: We assume dest->elems is NULL, when dest->alloc is 0. */ + +static reg_errcode_t +internal_function +re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1, + const re_node_set *src2) +{ + int i1, i2, is, id, delta, sbase; + if (src1->nelem == 0 || src2->nelem == 0) + return REG_NOERROR; + + /* We need dest->nelem + 2 * elems_in_intersection; this is a + conservative estimate. */ + if (src1->nelem + src2->nelem + dest->nelem > dest->alloc) + { + int new_alloc = src1->nelem + src2->nelem + dest->alloc; + int *new_elems = re_realloc (dest->elems, int, new_alloc); + if (BE (new_elems == NULL, 0)) + return REG_ESPACE; + dest->elems = new_elems; + dest->alloc = new_alloc; + } + + /* Find the items in the intersection of SRC1 and SRC2, and copy + into the top of DEST those that are not already in DEST itself. */ + sbase = dest->nelem + src1->nelem + src2->nelem; + i1 = src1->nelem - 1; + i2 = src2->nelem - 1; + id = dest->nelem - 1; + for (;;) + { + if (src1->elems[i1] == src2->elems[i2]) + { + /* Try to find the item in DEST. Maybe we could binary search? */ + while (id >= 0 && dest->elems[id] > src1->elems[i1]) + --id; + + if (id < 0 || dest->elems[id] != src1->elems[i1]) + dest->elems[--sbase] = src1->elems[i1]; + + if (--i1 < 0 || --i2 < 0) + break; + } + + /* Lower the highest of the two items. */ + else if (src1->elems[i1] < src2->elems[i2]) + { + if (--i2 < 0) + break; + } + else + { + if (--i1 < 0) + break; + } + } + + id = dest->nelem - 1; + is = dest->nelem + src1->nelem + src2->nelem - 1; + delta = is - sbase + 1; + + /* Now copy. When DELTA becomes zero, the remaining + DEST elements are already in place; this is more or + less the same loop that is in re_node_set_merge. */ + dest->nelem += delta; + if (delta > 0 && id >= 0) + for (;;) + { + if (dest->elems[is] > dest->elems[id]) + { + /* Copy from the top. */ + dest->elems[id + delta--] = dest->elems[is--]; + if (delta == 0) + break; + } + else + { + /* Slide from the bottom. */ + dest->elems[id + delta] = dest->elems[id]; + if (--id < 0) + break; + } + } + + /* Copy remaining SRC elements. */ + memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int)); + + return REG_NOERROR; +} + +/* Calculate the union set of the sets SRC1 and SRC2. And store it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ + +static reg_errcode_t +internal_function +re_node_set_init_union (re_node_set *dest, const re_node_set *src1, + const re_node_set *src2) +{ + int i1, i2, id; + if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0) + { + dest->alloc = src1->nelem + src2->nelem; + dest->elems = re_malloc (int, dest->alloc); + if (BE (dest->elems == NULL, 0)) + return REG_ESPACE; + } + else + { + if (src1 != NULL && src1->nelem > 0) + return re_node_set_init_copy (dest, src1); + else if (src2 != NULL && src2->nelem > 0) + return re_node_set_init_copy (dest, src2); + else + re_node_set_init_empty (dest); + return REG_NOERROR; + } + for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;) + { + if (src1->elems[i1] > src2->elems[i2]) + { + dest->elems[id++] = src2->elems[i2++]; + continue; + } + if (src1->elems[i1] == src2->elems[i2]) + ++i2; + dest->elems[id++] = src1->elems[i1++]; + } + if (i1 < src1->nelem) + { + memcpy (dest->elems + id, src1->elems + i1, + (src1->nelem - i1) * sizeof (int)); + id += src1->nelem - i1; + } + else if (i2 < src2->nelem) + { + memcpy (dest->elems + id, src2->elems + i2, + (src2->nelem - i2) * sizeof (int)); + id += src2->nelem - i2; + } + dest->nelem = id; + return REG_NOERROR; +} + +/* Calculate the union set of the sets DEST and SRC. And store it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ + +static reg_errcode_t +internal_function +re_node_set_merge (re_node_set *dest, const re_node_set *src) +{ + int is, id, sbase, delta; + if (src == NULL || src->nelem == 0) + return REG_NOERROR; + if (dest->alloc < 2 * src->nelem + dest->nelem) + { + int new_alloc = 2 * (src->nelem + dest->alloc); + int *new_buffer = re_realloc (dest->elems, int, new_alloc); + if (BE (new_buffer == NULL, 0)) + return REG_ESPACE; + dest->elems = new_buffer; + dest->alloc = new_alloc; + } + + if (BE (dest->nelem == 0, 0)) + { + dest->nelem = src->nelem; + memcpy (dest->elems, src->elems, src->nelem * sizeof (int)); + return REG_NOERROR; + } + + /* Copy into the top of DEST the items of SRC that are not + found in DEST. Maybe we could binary search in DEST? */ + for (sbase = dest->nelem + 2 * src->nelem, + is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; ) + { + if (dest->elems[id] == src->elems[is]) + is--, id--; + else if (dest->elems[id] < src->elems[is]) + dest->elems[--sbase] = src->elems[is--]; + else /* if (dest->elems[id] > src->elems[is]) */ + --id; + } + + if (is >= 0) + { + /* If DEST is exhausted, the remaining items of SRC must be unique. */ + sbase -= is + 1; + memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int)); + } + + id = dest->nelem - 1; + is = dest->nelem + 2 * src->nelem - 1; + delta = is - sbase + 1; + if (delta == 0) + return REG_NOERROR; + + /* Now copy. When DELTA becomes zero, the remaining + DEST elements are already in place. */ + dest->nelem += delta; + for (;;) + { + if (dest->elems[is] > dest->elems[id]) + { + /* Copy from the top. */ + dest->elems[id + delta--] = dest->elems[is--]; + if (delta == 0) + break; + } + else + { + /* Slide from the bottom. */ + dest->elems[id + delta] = dest->elems[id]; + if (--id < 0) + { + /* Copy remaining SRC elements. */ + memcpy (dest->elems, dest->elems + sbase, + delta * sizeof (int)); + break; + } + } + } + + return REG_NOERROR; +} + +/* Insert the new element ELEM to the re_node_set* SET. + SET should not already have ELEM. + return -1 if an error is occured, return 1 otherwise. */ + +static int +internal_function +re_node_set_insert (re_node_set *set, int elem) +{ + int idx; + /* In case the set is empty. */ + if (set->alloc == 0) + { + if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1)) + return 1; + else + return -1; + } + + if (BE (set->nelem, 0) == 0) + { + /* We already guaranteed above that set->alloc != 0. */ + set->elems[0] = elem; + ++set->nelem; + return 1; + } + + /* Realloc if we need. */ + if (set->alloc == set->nelem) + { + int *new_elems; + set->alloc = set->alloc * 2; + new_elems = re_realloc (set->elems, int, set->alloc); + if (BE (new_elems == NULL, 0)) + return -1; + set->elems = new_elems; + } + + /* Move the elements which follows the new element. Test the + first element separately to skip a check in the inner loop. */ + if (elem < set->elems[0]) + { + idx = 0; + for (idx = set->nelem; idx > 0; idx--) + set->elems[idx] = set->elems[idx - 1]; + } + else + { + for (idx = set->nelem; set->elems[idx - 1] > elem; idx--) + set->elems[idx] = set->elems[idx - 1]; + } + + /* Insert the new element. */ + set->elems[idx] = elem; + ++set->nelem; + return 1; +} + +/* Insert the new element ELEM to the re_node_set* SET. + SET should not already have any element greater than or equal to ELEM. + Return -1 if an error is occured, return 1 otherwise. */ + +static int +internal_function +re_node_set_insert_last (re_node_set *set, int elem) +{ + /* Realloc if we need. */ + if (set->alloc == set->nelem) + { + int *new_elems; + set->alloc = (set->alloc + 1) * 2; + new_elems = re_realloc (set->elems, int, set->alloc); + if (BE (new_elems == NULL, 0)) + return -1; + set->elems = new_elems; + } + + /* Insert the new element. */ + set->elems[set->nelem++] = elem; + return 1; +} + +/* Compare two node sets SET1 and SET2. + return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */ + +static int +internal_function __attribute ((pure)) +re_node_set_compare (const re_node_set *set1, const re_node_set *set2) +{ + int i; + if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem) + return 0; + for (i = set1->nelem ; --i >= 0 ; ) + if (set1->elems[i] != set2->elems[i]) + return 0; + return 1; +} + +/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */ + +static int +internal_function __attribute ((pure)) +re_node_set_contains (const re_node_set *set, int elem) +{ + unsigned int idx, right, mid; + if (set->nelem <= 0) + return 0; + + /* Binary search the element. */ + idx = 0; + right = set->nelem - 1; + while (idx < right) + { + mid = (idx + right) / 2; + if (set->elems[mid] < elem) + idx = mid + 1; + else + right = mid; + } + return set->elems[idx] == elem ? idx + 1 : 0; +} + +static void +internal_function +re_node_set_remove_at (re_node_set *set, int idx) +{ + if (idx < 0 || idx >= set->nelem) + return; + --set->nelem; + for (; idx < set->nelem; idx++) + set->elems[idx] = set->elems[idx + 1]; +} + + +/* Add the token TOKEN to dfa->nodes, and return the index of the token. + Or return -1, if an error will be occured. */ + +static int +internal_function +re_dfa_add_node (re_dfa_t *dfa, re_token_t token) +{ + int type = token.type; + if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0)) + { + size_t new_nodes_alloc = dfa->nodes_alloc * 2; + int *new_nexts, *new_indices; + re_node_set *new_edests, *new_eclosures; + re_token_t *new_nodes; + + /* Avoid overflows. */ + if (BE (new_nodes_alloc < dfa->nodes_alloc, 0)) + return -1; + + new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc); + if (BE (new_nodes == NULL, 0)) + return -1; + dfa->nodes = new_nodes; + new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc); + new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc); + new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc); + new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc); + if (BE (new_nexts == NULL || new_indices == NULL + || new_edests == NULL || new_eclosures == NULL, 0)) + return -1; + dfa->nexts = new_nexts; + dfa->org_indices = new_indices; + dfa->edests = new_edests; + dfa->eclosures = new_eclosures; + dfa->nodes_alloc = new_nodes_alloc; + } + dfa->nodes[dfa->nodes_len] = token; + dfa->nodes[dfa->nodes_len].constraint = 0; +#ifdef RE_ENABLE_I18N + dfa->nodes[dfa->nodes_len].accept_mb = + (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET; +#endif + dfa->nexts[dfa->nodes_len] = -1; + re_node_set_init_empty (dfa->edests + dfa->nodes_len); + re_node_set_init_empty (dfa->eclosures + dfa->nodes_len); + return dfa->nodes_len++; +} + +static inline unsigned int +internal_function +calc_state_hash (const re_node_set *nodes, unsigned int context) +{ + unsigned int hash = nodes->nelem + context; + int i; + for (i = 0 ; i < nodes->nelem ; i++) + hash += nodes->elems[i]; + return hash; +} + +/* Search for the state whose node_set is equivalent to NODES. + Return the pointer to the state, if we found it in the DFA. + Otherwise create the new one and return it. In case of an error + return NULL and set the error code in ERR. + Note: - We assume NULL as the invalid state, then it is possible that + return value is NULL and ERR is REG_NOERROR. + - We never return non-NULL value in case of any errors, it is for + optimization. */ + +static re_dfastate_t * +internal_function +re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa, + const re_node_set *nodes) +{ + unsigned int hash; + re_dfastate_t *new_state; + struct re_state_table_entry *spot; + int i; + if (BE (nodes->nelem == 0, 0)) + { + *err = REG_NOERROR; + return NULL; + } + hash = calc_state_hash (nodes, 0); + spot = dfa->state_table + (hash & dfa->state_hash_mask); + + for (i = 0 ; i < spot->num ; i++) + { + re_dfastate_t *state = spot->array[i]; + if (hash != state->hash) + continue; + if (re_node_set_compare (&state->nodes, nodes)) + return state; + } + + /* There are no appropriate state in the dfa, create the new one. */ + new_state = create_ci_newstate (dfa, nodes, hash); + if (BE (new_state == NULL, 0)) + *err = REG_ESPACE; + + return new_state; +} + +/* Search for the state whose node_set is equivalent to NODES and + whose context is equivalent to CONTEXT. + Return the pointer to the state, if we found it in the DFA. + Otherwise create the new one and return it. In case of an error + return NULL and set the error code in ERR. + Note: - We assume NULL as the invalid state, then it is possible that + return value is NULL and ERR is REG_NOERROR. + - We never return non-NULL value in case of any errors, it is for + optimization. */ + +static re_dfastate_t * +internal_function +re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa, + const re_node_set *nodes, unsigned int context) +{ + unsigned int hash; + re_dfastate_t *new_state; + struct re_state_table_entry *spot; + int i; + if (nodes->nelem == 0) + { + *err = REG_NOERROR; + return NULL; + } + hash = calc_state_hash (nodes, context); + spot = dfa->state_table + (hash & dfa->state_hash_mask); + + for (i = 0 ; i < spot->num ; i++) + { + re_dfastate_t *state = spot->array[i]; + if (state->hash == hash + && state->context == context + && re_node_set_compare (state->entrance_nodes, nodes)) + return state; + } + /* There are no appropriate state in `dfa', create the new one. */ + new_state = create_cd_newstate (dfa, nodes, context, hash); + if (BE (new_state == NULL, 0)) + *err = REG_ESPACE; + + return new_state; +} + +/* Finish initialization of the new state NEWSTATE, and using its hash value + HASH put in the appropriate bucket of DFA's state table. Return value + indicates the error code if failed. */ + +static reg_errcode_t +register_state (const re_dfa_t *dfa, re_dfastate_t *newstate, + unsigned int hash) +{ + struct re_state_table_entry *spot; + reg_errcode_t err; + int i; + + newstate->hash = hash; + err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem); + if (BE (err != REG_NOERROR, 0)) + return REG_ESPACE; + for (i = 0; i < newstate->nodes.nelem; i++) + { + int elem = newstate->nodes.elems[i]; + if (!IS_EPSILON_NODE (dfa->nodes[elem].type)) + re_node_set_insert_last (&newstate->non_eps_nodes, elem); + } + + spot = dfa->state_table + (hash & dfa->state_hash_mask); + if (BE (spot->alloc <= spot->num, 0)) + { + int new_alloc = 2 * spot->num + 2; + re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *, + new_alloc); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + spot->array = new_array; + spot->alloc = new_alloc; + } + spot->array[spot->num++] = newstate; + return REG_NOERROR; +} + +static void +free_state (re_dfastate_t *state) +{ + re_node_set_free (&state->non_eps_nodes); + re_node_set_free (&state->inveclosure); + if (state->entrance_nodes != &state->nodes) + { + re_node_set_free (state->entrance_nodes); + re_free (state->entrance_nodes); + } + re_node_set_free (&state->nodes); + re_free (state->word_trtable); + re_free (state->trtable); + re_free (state); +} + +/* Create the new state which is independ of contexts. + Return the new state if succeeded, otherwise return NULL. */ + +static re_dfastate_t * +internal_function +create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes, + unsigned int hash) +{ + int i; + reg_errcode_t err; + re_dfastate_t *newstate; + + newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); + if (BE (newstate == NULL, 0)) + return NULL; + err = re_node_set_init_copy (&newstate->nodes, nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_free (newstate); + return NULL; + } + + newstate->entrance_nodes = &newstate->nodes; + for (i = 0 ; i < nodes->nelem ; i++) + { + re_token_t *node = dfa->nodes + nodes->elems[i]; + re_token_type_t type = node->type; + if (type == CHARACTER && !node->constraint) + continue; +#ifdef RE_ENABLE_I18N + newstate->accept_mb |= node->accept_mb; +#endif /* RE_ENABLE_I18N */ + + /* If the state has the halt node, the state is a halt state. */ + if (type == END_OF_RE) + newstate->halt = 1; + else if (type == OP_BACK_REF) + newstate->has_backref = 1; + else if (type == ANCHOR || node->constraint) + newstate->has_constraint = 1; + } + err = register_state (dfa, newstate, hash); + if (BE (err != REG_NOERROR, 0)) + { + free_state (newstate); + newstate = NULL; + } + return newstate; +} + +/* Create the new state which is depend on the context CONTEXT. + Return the new state if succeeded, otherwise return NULL. */ + +static re_dfastate_t * +internal_function +create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes, + unsigned int context, unsigned int hash) +{ + int i, nctx_nodes = 0; + reg_errcode_t err; + re_dfastate_t *newstate; + + newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); + if (BE (newstate == NULL, 0)) + return NULL; + err = re_node_set_init_copy (&newstate->nodes, nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_free (newstate); + return NULL; + } + + newstate->context = context; + newstate->entrance_nodes = &newstate->nodes; + + for (i = 0 ; i < nodes->nelem ; i++) + { + unsigned int constraint = 0; + re_token_t *node = dfa->nodes + nodes->elems[i]; + re_token_type_t type = node->type; + if (node->constraint) + constraint = node->constraint; + + if (type == CHARACTER && !constraint) + continue; +#ifdef RE_ENABLE_I18N + newstate->accept_mb |= node->accept_mb; +#endif /* RE_ENABLE_I18N */ + + /* If the state has the halt node, the state is a halt state. */ + if (type == END_OF_RE) + newstate->halt = 1; + else if (type == OP_BACK_REF) + newstate->has_backref = 1; + else if (type == ANCHOR) + constraint = node->opr.ctx_type; + + if (constraint) + { + if (newstate->entrance_nodes == &newstate->nodes) + { + newstate->entrance_nodes = re_malloc (re_node_set, 1); + if (BE (newstate->entrance_nodes == NULL, 0)) + { + free_state (newstate); + return NULL; + } + re_node_set_init_copy (newstate->entrance_nodes, nodes); + nctx_nodes = 0; + newstate->has_constraint = 1; + } + + if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context)) + { + re_node_set_remove_at (&newstate->nodes, i - nctx_nodes); + ++nctx_nodes; + } + } + } + err = register_state (dfa, newstate, hash); + if (BE (err != REG_NOERROR, 0)) + { + free_state (newstate); + newstate = NULL; + } + return newstate; +} + +/******************************************************************************/ +/******************************************************************************/ +/******************************************************************************/ +/* GKINCLUDE #include "regcomp.c" */ +/******************************************************************************/ +/******************************************************************************/ +/******************************************************************************/ +/* Extended regular expression matching and search library. + Copyright (C) 2002,2003,2004,2005,2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, + size_t length, reg_syntax_t syntax); +static void re_compile_fastmap_iter (regex_t *bufp, + const re_dfastate_t *init_state, + char *fastmap); +static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len); +#ifdef RE_ENABLE_I18N +static void free_charset (re_charset_t *cset); +#endif /* RE_ENABLE_I18N */ +static void free_workarea_compile (regex_t *preg); +static reg_errcode_t create_initial_state (re_dfa_t *dfa); +#ifdef RE_ENABLE_I18N +static void optimize_utf8 (re_dfa_t *dfa); +#endif +static reg_errcode_t analyze (regex_t *preg); +static reg_errcode_t preorder (bin_tree_t *root, + reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra); +static reg_errcode_t postorder (bin_tree_t *root, + reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra); +static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node); +static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node); +static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg, + bin_tree_t *node); +static reg_errcode_t calc_first (void *extra, bin_tree_t *node); +static reg_errcode_t calc_next (void *extra, bin_tree_t *node); +static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node); +static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint); +static int search_duplicated_node (const re_dfa_t *dfa, int org_node, + unsigned int constraint); +static reg_errcode_t calc_eclosure (re_dfa_t *dfa); +static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, + int node, int root); +static reg_errcode_t calc_inveclosure (re_dfa_t *dfa); +static int fetch_number (re_string_t *input, re_token_t *token, + reg_syntax_t syntax); +static int peek_token (re_token_t *token, re_string_t *input, + reg_syntax_t syntax) internal_function; +static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, + reg_syntax_t syntax, reg_errcode_t *err); +static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp, + re_dfa_t *dfa, re_token_t *token, + reg_syntax_t syntax, reg_errcode_t *err); +static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, + re_token_t *token, reg_syntax_t syntax, + reg_errcode_t *err); +static reg_errcode_t parse_bracket_element (bracket_elem_t *elem, + re_string_t *regexp, + re_token_t *token, int token_len, + re_dfa_t *dfa, + reg_syntax_t syntax, + int accept_hyphen); +static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, + re_string_t *regexp, + re_token_t *token); +#ifdef RE_ENABLE_I18N +static reg_errcode_t build_equiv_class (bitset_t sbcset, + re_charset_t *mbcset, + int *equiv_class_alloc, + const unsigned char *name); +static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, + bitset_t sbcset, + re_charset_t *mbcset, + int *char_class_alloc, + const unsigned char *class_name, + reg_syntax_t syntax); +#else /* not RE_ENABLE_I18N */ +static reg_errcode_t build_equiv_class (bitset_t sbcset, + const unsigned char *name); +static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, + bitset_t sbcset, + const unsigned char *class_name, + reg_syntax_t syntax); +#endif /* not RE_ENABLE_I18N */ +static bin_tree_t *build_charclass_op (re_dfa_t *dfa, + RE_TRANSLATE_TYPE trans, + const unsigned char *class_name, + const unsigned char *extra, + int non_match, reg_errcode_t *err); +static bin_tree_t *create_tree (re_dfa_t *dfa, + bin_tree_t *left, bin_tree_t *right, + re_token_type_t type); +static bin_tree_t *create_token_tree (re_dfa_t *dfa, + bin_tree_t *left, bin_tree_t *right, + const re_token_t *token); +static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa); +static void free_token (re_token_t *node); +static reg_errcode_t free_tree (void *extra, bin_tree_t *node); +static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node); + +/* This table gives an error message for each of the error codes listed + in regex.h. Obviously the order here has to be same as there. + POSIX doesn't require that we do anything for REG_NOERROR, + but why not be nice? */ + +const char __re_error_msgid[] attribute_hidden = + { +#define REG_NOERROR_IDX 0 + gettext_noop ("Success") /* REG_NOERROR */ + "\0" +#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") + gettext_noop ("No match") /* REG_NOMATCH */ + "\0" +#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match") + gettext_noop ("Invalid regular expression") /* REG_BADPAT */ + "\0" +#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") + gettext_noop ("Invalid collation character") /* REG_ECOLLATE */ + "\0" +#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character") + gettext_noop ("Invalid character class name") /* REG_ECTYPE */ + "\0" +#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name") + gettext_noop ("Trailing backslash") /* REG_EESCAPE */ + "\0" +#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash") + gettext_noop ("Invalid back reference") /* REG_ESUBREG */ + "\0" +#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") + gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */ + "\0" +#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") + gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ + "\0" +#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") + gettext_noop ("Unmatched \\{") /* REG_EBRACE */ + "\0" +#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{") + gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */ + "\0" +#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") + gettext_noop ("Invalid range end") /* REG_ERANGE */ + "\0" +#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end") + gettext_noop ("Memory exhausted") /* REG_ESPACE */ + "\0" +#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted") + gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */ + "\0" +#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") + gettext_noop ("Premature end of regular expression") /* REG_EEND */ + "\0" +#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression") + gettext_noop ("Regular expression too big") /* REG_ESIZE */ + "\0" +#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big") + gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ + }; + +const size_t __re_error_msgid_idx[] attribute_hidden = + { + REG_NOERROR_IDX, + REG_NOMATCH_IDX, + REG_BADPAT_IDX, + REG_ECOLLATE_IDX, + REG_ECTYPE_IDX, + REG_EESCAPE_IDX, + REG_ESUBREG_IDX, + REG_EBRACK_IDX, + REG_EPAREN_IDX, + REG_EBRACE_IDX, + REG_BADBR_IDX, + REG_ERANGE_IDX, + REG_ESPACE_IDX, + REG_BADRPT_IDX, + REG_EEND_IDX, + REG_ESIZE_IDX, + REG_ERPAREN_IDX + }; + +/* Entry points for GNU code. */ + +/* re_compile_pattern is the GNU regular expression compiler: it + compiles PATTERN (of length LENGTH) and puts the result in BUFP. + Returns 0 if the pattern was valid, otherwise an error string. + + Assumes the `allocated' (and perhaps `buffer') and `translate' fields + are set in BUFP on entry. */ + +const char * +re_compile_pattern (pattern, length, bufp) + const char *pattern; + size_t length; + struct re_pattern_buffer *bufp; +{ + reg_errcode_t ret; + + /* And GNU code determines whether or not to get register information + by passing null for the REGS argument to re_match, etc., not by + setting no_sub, unless RE_NO_SUB is set. */ + bufp->no_sub = !!(re_syntax_options & RE_NO_SUB); + + /* Match anchors at newline. */ + bufp->newline_anchor = 1; + + ret = re_compile_internal (bufp, pattern, length, re_syntax_options); + + if (!ret) + return NULL; + return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); +} +#ifdef _LIBC +weak_alias (__re_compile_pattern, re_compile_pattern) +#endif + +/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can + also be assigned to arbitrarily: each pattern buffer stores its own + syntax, so it can be changed between regex compilations. */ +/* This has no initializer because initialized variables in Emacs + become read-only after dumping. */ +reg_syntax_t re_syntax_options; + + +/* Specify the precise syntax of regexps for compilation. This provides + for compatibility for various utilities which historically have + different, incompatible syntaxes. + + The argument SYNTAX is a bit mask comprised of the various bits + defined in regex.h. We return the old syntax. */ + +reg_syntax_t +re_set_syntax (syntax) + reg_syntax_t syntax; +{ + reg_syntax_t ret = re_syntax_options; + + re_syntax_options = syntax; + return ret; +} +#ifdef _LIBC +weak_alias (__re_set_syntax, re_set_syntax) +#endif + +int +re_compile_fastmap (bufp) + struct re_pattern_buffer *bufp; +{ + re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; + char *fastmap = bufp->fastmap; + + memset (fastmap, '\0', sizeof (char) * SBC_MAX); + re_compile_fastmap_iter (bufp, dfa->init_state, fastmap); + if (dfa->init_state != dfa->init_state_word) + re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap); + if (dfa->init_state != dfa->init_state_nl) + re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap); + if (dfa->init_state != dfa->init_state_begbuf) + re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap); + bufp->fastmap_accurate = 1; + return 0; +} +#ifdef _LIBC +weak_alias (__re_compile_fastmap, re_compile_fastmap) +#endif + +static inline void +__attribute ((always_inline)) +re_set_fastmap (char *fastmap, int icase, int ch) +{ + fastmap[ch] = 1; + if (icase) + fastmap[tolower (ch)] = 1; +} + +/* Helper function for re_compile_fastmap. + Compile fastmap for the initial_state INIT_STATE. */ + +static void +re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, + char *fastmap) +{ + re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; + int node_cnt; + int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE)); + for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) + { + int node = init_state->nodes.elems[node_cnt]; + re_token_type_t type = dfa->nodes[node].type; + + if (type == CHARACTER) + { + re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c); +#ifdef RE_ENABLE_I18N + if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) + { + unsigned char *buf = alloca (dfa->mb_cur_max), *p; + wchar_t wc; + mbstate_t state; + + p = buf; + *p++ = dfa->nodes[node].opr.c; + while (++node < dfa->nodes_len + && dfa->nodes[node].type == CHARACTER + && dfa->nodes[node].mb_partial) + *p++ = dfa->nodes[node].opr.c; + memset (&state, '\0', sizeof (state)); + if (mbrtowc (&wc, (const char *) buf, p - buf, + &state) == p - buf + && (__wcrtomb ((char *) buf, towlower (wc), &state) + != (size_t) -1)) + re_set_fastmap (fastmap, 0, buf[0]); + } +#endif + } + else if (type == SIMPLE_BRACKET) + { + int i, ch; + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + { + int j; + bitset_word_t w = dfa->nodes[node].opr.sbcset[i]; + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + if (w & ((bitset_word_t) 1 << j)) + re_set_fastmap (fastmap, icase, ch); + } + } +#ifdef RE_ENABLE_I18N + else if (type == COMPLEX_BRACKET) + { + int i; + re_charset_t *cset = dfa->nodes[node].opr.mbcset; + if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes + || cset->nranges || cset->nchar_classes) + { +# ifdef _LIBC + if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0) + { + /* In this case we want to catch the bytes which are + the first byte of any collation elements. + e.g. In da_DK, we want to catch 'a' since "aa" + is a valid collation element, and don't catch + 'b' since 'b' is the only collation element + which starts from 'b'. */ + const int32_t *table = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + for (i = 0; i < SBC_MAX; ++i) + if (table[i] < 0) + re_set_fastmap (fastmap, icase, i); + } +# else + if (dfa->mb_cur_max > 1) + for (i = 0; i < SBC_MAX; ++i) + if (__btowc (i) == WEOF) + re_set_fastmap (fastmap, icase, i); +# endif /* not _LIBC */ + } + for (i = 0; i < cset->nmbchars; ++i) + { + char buf[256]; + mbstate_t state; + memset (&state, '\0', sizeof (state)); + if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1) + re_set_fastmap (fastmap, icase, *(unsigned char *) buf); + if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) + { + if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state) + != (size_t) -1) + re_set_fastmap (fastmap, 0, *(unsigned char *) buf); + } + } + } +#endif /* RE_ENABLE_I18N */ + else if (type == OP_PERIOD +#ifdef RE_ENABLE_I18N + || type == OP_UTF8_PERIOD +#endif /* RE_ENABLE_I18N */ + || type == END_OF_RE) + { + memset (fastmap, '\1', sizeof (char) * SBC_MAX); + if (type == END_OF_RE) + bufp->can_be_null = 1; + return; + } + } +} + +/* Entry point for POSIX code. */ +/* regcomp takes a regular expression as a string and compiles it. + + PREG is a regex_t *. We do not expect any fields to be initialized, + since POSIX says we shouldn't. Thus, we set + + `buffer' to the compiled pattern; + `used' to the length of the compiled pattern; + `syntax' to RE_SYNTAX_POSIX_EXTENDED if the + REG_EXTENDED bit in CFLAGS is set; otherwise, to + RE_SYNTAX_POSIX_BASIC; + `newline_anchor' to REG_NEWLINE being set in CFLAGS; + `fastmap' to an allocated space for the fastmap; + `fastmap_accurate' to zero; + `re_nsub' to the number of subexpressions in PATTERN. + + PATTERN is the address of the pattern string. + + CFLAGS is a series of bits which affect compilation. + + If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we + use POSIX basic syntax. + + If REG_NEWLINE is set, then . and [^...] don't match newline. + Also, regexec will try a match beginning after every newline. + + If REG_ICASE is set, then we considers upper- and lowercase + versions of letters to be equivalent when matching. + + If REG_NOSUB is set, then when PREG is passed to regexec, that + routine will report only success or failure, and nothing about the + registers. + + It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for + the return codes and their meanings.) */ + +int +regcomp (preg, pattern, cflags) + regex_t *__restrict preg; + const char *__restrict pattern; + int cflags; +{ + reg_errcode_t ret; + reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED + : RE_SYNTAX_POSIX_BASIC); + + preg->buffer = NULL; + preg->allocated = 0; + preg->used = 0; + + /* Try to allocate space for the fastmap. */ + preg->fastmap = re_malloc (char, SBC_MAX); + if (BE (preg->fastmap == NULL, 0)) + return REG_ESPACE; + + syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0; + + /* If REG_NEWLINE is set, newlines are treated differently. */ + if (cflags & REG_NEWLINE) + { /* REG_NEWLINE implies neither . nor [^...] match newline. */ + syntax &= ~RE_DOT_NEWLINE; + syntax |= RE_HAT_LISTS_NOT_NEWLINE; + /* It also changes the matching behavior. */ + preg->newline_anchor = 1; + } + else + preg->newline_anchor = 0; + preg->no_sub = !!(cflags & REG_NOSUB); + preg->translate = NULL; + + ret = re_compile_internal (preg, pattern, strlen (pattern), syntax); + + /* POSIX doesn't distinguish between an unmatched open-group and an + unmatched close-group: both are REG_EPAREN. */ + if (ret == REG_ERPAREN) + ret = REG_EPAREN; + + /* We have already checked preg->fastmap != NULL. */ + if (BE (ret == REG_NOERROR, 1)) + /* Compute the fastmap now, since regexec cannot modify the pattern + buffer. This function never fails in this implementation. */ + (void) re_compile_fastmap (preg); + else + { + /* Some error occurred while compiling the expression. */ + re_free (preg->fastmap); + preg->fastmap = NULL; + } + + return (int) ret; +} +#ifdef _LIBC +weak_alias (__regcomp, regcomp) +#endif + +/* Returns a message corresponding to an error code, ERRCODE, returned + from either regcomp or regexec. We don't use PREG here. */ + +/* regerror ( int errcode, preg, errbuf, errbuf_size) */ +size_t +regerror ( + int errcode, + const regex_t *__restrict preg, + char *__restrict errbuf, + size_t errbuf_size) +{ + const char *msg; + size_t msg_size; + + if (BE (errcode < 0 + || errcode >= (int) (sizeof (__re_error_msgid_idx) + / sizeof (__re_error_msgid_idx[0])), 0)) + /* Only error codes returned by the rest of the code should be passed + to this routine. If we are given anything else, or if other regex + code generates an invalid error code, then the program has a bug. + Dump core so we can fix it. */ + abort (); + + msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]); + + msg_size = strlen (msg) + 1; /* Includes the null. */ + + if (BE (errbuf_size != 0, 1)) + { + if (BE (msg_size > errbuf_size, 0)) + { +#if defined HAVE_MEMPCPY || defined _LIBC + *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0'; +#else + memcpy (errbuf, msg, errbuf_size - 1); + errbuf[errbuf_size - 1] = 0; +#endif + } + else + memcpy (errbuf, msg, msg_size); + } + + return msg_size; +} +#ifdef _LIBC +weak_alias (__regerror, regerror) +#endif + + +#ifdef RE_ENABLE_I18N +/* This static array is used for the map to single-byte characters when + UTF-8 is used. Otherwise we would allocate memory just to initialize + it the same all the time. UTF-8 is the preferred encoding so this is + a worthwhile optimization. */ +static const bitset_t utf8_sb_map = +{ + /* Set the first 128 bits. */ + [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX +}; +#endif + + +static void +free_dfa_content (re_dfa_t *dfa) +{ + int i, j; + + if (dfa->nodes) + for (i = 0; i < dfa->nodes_len; ++i) + free_token (dfa->nodes + i); + re_free (dfa->nexts); + for (i = 0; i < dfa->nodes_len; ++i) + { + if (dfa->eclosures != NULL) + re_node_set_free (dfa->eclosures + i); + if (dfa->inveclosures != NULL) + re_node_set_free (dfa->inveclosures + i); + if (dfa->edests != NULL) + re_node_set_free (dfa->edests + i); + } + re_free (dfa->edests); + re_free (dfa->eclosures); + re_free (dfa->inveclosures); + re_free (dfa->nodes); + + if (dfa->state_table) + for (i = 0; i <= dfa->state_hash_mask; ++i) + { + struct re_state_table_entry *entry = dfa->state_table + i; + for (j = 0; j < entry->num; ++j) + { + re_dfastate_t *state = entry->array[j]; + free_state (state); + } + re_free (entry->array); + } + re_free (dfa->state_table); +#ifdef RE_ENABLE_I18N + if (dfa->sb_char != utf8_sb_map) + re_free (dfa->sb_char); +#endif + re_free (dfa->subexp_map); +#ifdef DEBUG + re_free (dfa->re_str); +#endif + + re_free (dfa); +} + + +/* Free dynamically allocated space used by PREG. */ + +void +regfree (preg) + regex_t *preg; +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + if (BE (dfa != NULL, 1)) + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + + re_free (preg->fastmap); + preg->fastmap = NULL; + + re_free (preg->translate); + preg->translate = NULL; +} +#ifdef _LIBC +weak_alias (__regfree, regfree) +#endif + +/* Entry points compatible with 4.2 BSD regex library. We don't define + them unless specifically requested. */ + +#if defined _REGEX_RE_COMP || defined _LIBC + +/* BSD has one and only one pattern buffer. */ +static struct re_pattern_buffer re_comp_buf; + +char * +# ifdef _LIBC +/* Make these definitions weak in libc, so POSIX programs can redefine + these names if they don't use our functions, and still use + regcomp/regexec above without link errors. */ +weak_function +# endif +re_comp (s) + const char *s; +{ + reg_errcode_t ret; + char *fastmap; + + if (!s) + { + if (!re_comp_buf.buffer) + return gettext ("No previous regular expression"); + return 0; + } + + if (re_comp_buf.buffer) + { + fastmap = re_comp_buf.fastmap; + re_comp_buf.fastmap = NULL; + __regfree (&re_comp_buf); + memset (&re_comp_buf, '\0', sizeof (re_comp_buf)); + re_comp_buf.fastmap = fastmap; + } + + if (re_comp_buf.fastmap == NULL) + { + re_comp_buf.fastmap = (char *) malloc (SBC_MAX); + if (re_comp_buf.fastmap == NULL) + return (char *) gettext (__re_error_msgid + + __re_error_msgid_idx[(int) REG_ESPACE]); + } + + /* Since `re_exec' always passes NULL for the `regs' argument, we + don't need to initialize the pattern buffer fields which affect it. */ + + /* Match anchors at newlines. */ + re_comp_buf.newline_anchor = 1; + + ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options); + + if (!ret) + return NULL; + + /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ + return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); +} + +#ifdef _LIBC +libc_freeres_fn (free_mem) +{ + __regfree (&re_comp_buf); +} +#endif + +#endif /* _REGEX_RE_COMP */ + +/* Internal entry point. + Compile the regular expression PATTERN, whose length is LENGTH. + SYNTAX indicate regular expression's syntax. */ + +static reg_errcode_t +re_compile_internal (regex_t *preg, const char * pattern, size_t length, + reg_syntax_t syntax) +{ + reg_errcode_t err = REG_NOERROR; + re_dfa_t *dfa; + re_string_t regexp; + + /* Initialize the pattern buffer. */ + preg->fastmap_accurate = 0; + preg->syntax = syntax; + preg->not_bol = preg->not_eol = 0; + preg->used = 0; + preg->re_nsub = 0; + preg->can_be_null = 0; + preg->regs_allocated = REGS_UNALLOCATED; + + /* Initialize the dfa. */ + dfa = (re_dfa_t *) preg->buffer; + if (BE (preg->allocated < sizeof (re_dfa_t), 0)) + { + /* If zero allocated, but buffer is non-null, try to realloc + enough space. This loses if buffer's address is bogus, but + that is the user's responsibility. If ->buffer is NULL this + is a simple allocation. */ + dfa = re_realloc (preg->buffer, re_dfa_t, 1); + if (dfa == NULL) + return REG_ESPACE; + preg->allocated = sizeof (re_dfa_t); + preg->buffer = (unsigned char *) dfa; + } + preg->used = sizeof (re_dfa_t); + + err = init_dfa (dfa, length); + if (BE (err != REG_NOERROR, 0)) + { + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + return err; + } +#ifdef DEBUG + /* Note: length+1 will not overflow since it is checked in init_dfa. */ + dfa->re_str = re_malloc (char, length + 1); + strncpy (dfa->re_str, pattern, length + 1); +#endif + + __libc_lock_init (dfa->lock); + + err = re_string_construct (®exp, pattern, length, preg->translate, + syntax & RE_ICASE, dfa); + if (BE (err != REG_NOERROR, 0)) + { + re_compile_internal_free_return: + free_workarea_compile (preg); + re_string_destruct (®exp); + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + return err; + } + + /* Parse the regular expression, and build a structure tree. */ + preg->re_nsub = 0; + dfa->str_tree = parse (®exp, preg, syntax, &err); + if (BE (dfa->str_tree == NULL, 0)) + goto re_compile_internal_free_return; + + /* Analyze the tree and create the nfa. */ + err = analyze (preg); + if (BE (err != REG_NOERROR, 0)) + goto re_compile_internal_free_return; + +#ifdef RE_ENABLE_I18N + /* If possible, do searching in single byte encoding to speed things up. */ + if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL) + optimize_utf8 (dfa); +#endif + + /* Then create the initial state of the dfa. */ + err = create_initial_state (dfa); + + /* Release work areas. */ + free_workarea_compile (preg); + re_string_destruct (®exp); + + if (BE (err != REG_NOERROR, 0)) + { + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + } + + return err; +} + +/* Initialize DFA. We use the length of the regular expression PAT_LEN + as the initial length of some arrays. */ + +static reg_errcode_t +init_dfa (re_dfa_t *dfa, size_t pat_len) +{ + unsigned int table_size; +#ifndef _LIBC + char *codeset_name; +#endif + + memset (dfa, '\0', sizeof (re_dfa_t)); + + /* Force allocation of str_tree_storage the first time. */ + dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; + + /* Avoid overflows. */ + if (pat_len == SIZE_MAX) + return REG_ESPACE; + + dfa->nodes_alloc = pat_len + 1; + dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc); + + /* table_size = 2 ^ ceil(log pat_len) */ + for (table_size = 1; ; table_size <<= 1) + if (table_size > pat_len) + break; + + dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size); + dfa->state_hash_mask = table_size - 1; + + dfa->mb_cur_max = MB_CUR_MAX; +#ifdef _LIBC + if (dfa->mb_cur_max == 6 + && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0) + dfa->is_utf8 = 1; + dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII) + != 0); +#else +# ifdef HAVE_LANGINFO_CODESET + codeset_name = nl_langinfo (CODESET); +# else + codeset_name = getenv ("LC_ALL"); + if (codeset_name == NULL || codeset_name[0] == '\0') + codeset_name = getenv ("LC_CTYPE"); + if (codeset_name == NULL || codeset_name[0] == '\0') + codeset_name = getenv ("LANG"); + if (codeset_name == NULL) + codeset_name = ""; + else if (strchr (codeset_name, '.') != NULL) + codeset_name = strchr (codeset_name, '.') + 1; +# endif + + if (strcasecmp (codeset_name, "UTF-8") == 0 + || strcasecmp (codeset_name, "UTF8") == 0) + dfa->is_utf8 = 1; + + /* We check exhaustively in the loop below if this charset is a + superset of ASCII. */ + dfa->map_notascii = 0; +#endif + +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + if (dfa->is_utf8) + dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map; + else + { + int i, j, ch; + + dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); + if (BE (dfa->sb_char == NULL, 0)) + return REG_ESPACE; + + /* Set the bits corresponding to single byte chars. */ + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + { + wint_t wch = __btowc (ch); + if (wch != WEOF) + dfa->sb_char[i] |= (bitset_word_t) 1 << j; +# ifndef _LIBC + if (isascii (ch) && wch != ch) + dfa->map_notascii = 1; +# endif + } + } + } +#endif + + if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0)) + return REG_ESPACE; + return REG_NOERROR; +} + +/* Initialize WORD_CHAR table, which indicate which character is + "word". In this case "word" means that it is the word construction + character used by some operators like "\<", "\>", etc. */ + +static void +internal_function +init_word_char (re_dfa_t *dfa) +{ + int i, j, ch; + dfa->word_ops_used = 1; + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + if (isalnum (ch) || ch == '_') + dfa->word_char[i] |= (bitset_word_t) 1 << j; +} + +/* Free the work area which are only used while compiling. */ + +static void +free_workarea_compile (regex_t *preg) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_storage_t *storage, *next; + for (storage = dfa->str_tree_storage; storage; storage = next) + { + next = storage->next; + re_free (storage); + } + dfa->str_tree_storage = NULL; + dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; + dfa->str_tree = NULL; + re_free (dfa->org_indices); + dfa->org_indices = NULL; +} + +/* Create initial states for all contexts. */ + +static reg_errcode_t +create_initial_state (re_dfa_t *dfa) +{ + int first, i; + reg_errcode_t err; + re_node_set init_nodes; + + /* Initial states have the epsilon closure of the node which is + the first node of the regular expression. */ + first = dfa->str_tree->first->node_idx; + dfa->init_node = first; + err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* The back-references which are in initial states can epsilon transit, + since in this case all of the subexpressions can be null. + Then we add epsilon closures of the nodes which are the next nodes of + the back-references. */ + if (dfa->nbackref > 0) + for (i = 0; i < init_nodes.nelem; ++i) + { + int node_idx = init_nodes.elems[i]; + re_token_type_t type = dfa->nodes[node_idx].type; + + int clexp_idx; + if (type != OP_BACK_REF) + continue; + for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx) + { + re_token_t *clexp_node; + clexp_node = dfa->nodes + init_nodes.elems[clexp_idx]; + if (clexp_node->type == OP_CLOSE_SUBEXP + && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx) + break; + } + if (clexp_idx == init_nodes.nelem) + continue; + + if (type == OP_BACK_REF) + { + int dest_idx = dfa->edests[node_idx].elems[0]; + if (!re_node_set_contains (&init_nodes, dest_idx)) + { + re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx); + i = 0; + } + } + } + + /* It must be the first time to invoke acquire_state. */ + dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0); + /* We don't check ERR here, since the initial state must not be NULL. */ + if (BE (dfa->init_state == NULL, 0)) + return err; + if (dfa->init_state->has_constraint) + { + dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes, + CONTEXT_WORD); + dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes, + CONTEXT_NEWLINE); + dfa->init_state_begbuf = re_acquire_state_context (&err, dfa, + &init_nodes, + CONTEXT_NEWLINE + | CONTEXT_BEGBUF); + if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL + || dfa->init_state_begbuf == NULL, 0)) + return err; + } + else + dfa->init_state_word = dfa->init_state_nl + = dfa->init_state_begbuf = dfa->init_state; + + re_node_set_free (&init_nodes); + return REG_NOERROR; +} + +#ifdef RE_ENABLE_I18N +/* If it is possible to do searching in single byte encoding instead of UTF-8 + to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change + DFA nodes where needed. */ + +static void +optimize_utf8 (re_dfa_t *dfa) +{ + int node, i, mb_chars = 0, has_period = 0; + + for (node = 0; node < dfa->nodes_len; ++node) + switch (dfa->nodes[node].type) + { + case CHARACTER: + if (dfa->nodes[node].opr.c >= 0x80) + mb_chars = 1; + break; + case ANCHOR: + switch (dfa->nodes[node].opr.idx) + { + case LINE_FIRST: + case LINE_LAST: + case BUF_FIRST: + case BUF_LAST: + break; + default: + /* Word anchors etc. cannot be handled. */ + return; + } + break; + case OP_PERIOD: + has_period = 1; + break; + case OP_BACK_REF: + case OP_ALT: + case END_OF_RE: + case OP_DUP_ASTERISK: + case OP_OPEN_SUBEXP: + case OP_CLOSE_SUBEXP: + break; + case COMPLEX_BRACKET: + return; + case SIMPLE_BRACKET: + /* Just double check. The non-ASCII range starts at 0x80. */ + assert (0x80 % BITSET_WORD_BITS == 0); + for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i) + if (dfa->nodes[node].opr.sbcset[i]) + return; + break; + default: + abort (); + } + + if (mb_chars || has_period) + for (node = 0; node < dfa->nodes_len; ++node) + { + if (dfa->nodes[node].type == CHARACTER + && dfa->nodes[node].opr.c >= 0x80) + dfa->nodes[node].mb_partial = 0; + else if (dfa->nodes[node].type == OP_PERIOD) + dfa->nodes[node].type = OP_UTF8_PERIOD; + } + + /* The search can be in single byte locale. */ + dfa->mb_cur_max = 1; + dfa->is_utf8 = 0; + dfa->has_mb_node = dfa->nbackref > 0 || has_period; +} +#endif + +/* Analyze the structure tree, and calculate "first", "next", "edest", + "eclosure", and "inveclosure". */ + +static reg_errcode_t +analyze (regex_t *preg) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + reg_errcode_t ret; + + /* Allocate arrays. */ + dfa->nexts = re_malloc (int, dfa->nodes_alloc); + dfa->org_indices = re_malloc (int, dfa->nodes_alloc); + dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc); + dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); + if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL + || dfa->eclosures == NULL, 0)) + return REG_ESPACE; + + dfa->subexp_map = re_malloc (int, preg->re_nsub); + if (dfa->subexp_map != NULL) + { + int i; + for (i = 0; i < preg->re_nsub; i++) + dfa->subexp_map[i] = i; + preorder (dfa->str_tree, optimize_subexps, dfa); + for (i = 0; i < preg->re_nsub; i++) + if (dfa->subexp_map[i] != i) + break; + if (i == preg->re_nsub) + { + free (dfa->subexp_map); + dfa->subexp_map = NULL; + } + } + + ret = postorder (dfa->str_tree, lower_subexps, preg); + if (BE (ret != REG_NOERROR, 0)) + return ret; + ret = postorder (dfa->str_tree, calc_first, dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + preorder (dfa->str_tree, calc_next, dfa); + ret = preorder (dfa->str_tree, link_nfa_nodes, dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + ret = calc_eclosure (dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + /* We only need this during the prune_impossible_nodes pass in regexec.c; + skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */ + if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match) + || dfa->nbackref) + { + dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len); + if (BE (dfa->inveclosures == NULL, 0)) + return REG_ESPACE; + ret = calc_inveclosure (dfa); + } + + return ret; +} + +/* Our parse trees are very unbalanced, so we cannot use a stack to + implement parse tree visits. Instead, we use parent pointers and + some hairy code in these two functions. */ +static reg_errcode_t +postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra) +{ + bin_tree_t *node, *prev; + + for (node = root; ; ) + { + /* Descend down the tree, preferably to the left (or to the right + if that's the only child). */ + while (node->left || node->right) + if (node->left) + node = node->left; + else + node = node->right; + + do + { + reg_errcode_t err = fn (extra, node); + if (BE (err != REG_NOERROR, 0)) + return err; + if (node->parent == NULL) + return REG_NOERROR; + prev = node; + node = node->parent; + } + /* Go up while we have a node that is reached from the right. */ + while (node->right == prev || node->right == NULL); + node = node->right; + } +} + +static reg_errcode_t +preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra) +{ + bin_tree_t *node; + + for (node = root; ; ) + { + reg_errcode_t err = fn (extra, node); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* Go to the left node, or up and to the right. */ + if (node->left) + node = node->left; + else + { + bin_tree_t *prev = NULL; + while (node->right == prev || node->right == NULL) + { + prev = node; + node = node->parent; + if (!node) + return REG_NOERROR; + } + node = node->right; + } + } +} + +/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell + re_search_internal to map the inner one's opr.idx to this one's. Adjust + backreferences as well. Requires a preorder visit. */ +static reg_errcode_t +optimize_subexps (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + + if (node->token.type == OP_BACK_REF && dfa->subexp_map) + { + int idx = node->token.opr.idx; + node->token.opr.idx = dfa->subexp_map[idx]; + dfa->used_bkref_map |= 1 << node->token.opr.idx; + } + + else if (node->token.type == SUBEXP + && node->left && node->left->token.type == SUBEXP) + { + int other_idx = node->left->token.opr.idx; + + node->left = node->left->left; + if (node->left) + node->left->parent = node; + + dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx]; + if (other_idx < BITSET_WORD_BITS) + dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx); + } + + return REG_NOERROR; +} + +/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation + of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */ +static reg_errcode_t +lower_subexps (void *extra, bin_tree_t *node) +{ + regex_t *preg = (regex_t *) extra; + reg_errcode_t err = REG_NOERROR; + + if (node->left && node->left->token.type == SUBEXP) + { + node->left = lower_subexp (&err, preg, node->left); + if (node->left) + node->left->parent = node; + } + if (node->right && node->right->token.type == SUBEXP) + { + node->right = lower_subexp (&err, preg, node->right); + if (node->right) + node->right->parent = node; + } + + return err; +} + +static bin_tree_t * +lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *body = node->left; + bin_tree_t *op, *cls, *tree1, *tree; + + if (preg->no_sub + /* We do not optimize empty subexpressions, because otherwise we may + have bad CONCAT nodes with NULL children. This is obviously not + very common, so we do not lose much. An example that triggers + this case is the sed "script" /\(\)/x. */ + && node->left != NULL + && (node->token.opr.idx >= BITSET_WORD_BITS + || !(dfa->used_bkref_map + & ((bitset_word_t) 1 << node->token.opr.idx)))) + return node->left; + + /* Convert the SUBEXP node to the concatenation of an + OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */ + op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP); + cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP); + tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls; + tree = create_tree (dfa, op, tree1, CONCAT); + if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + + op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx; + op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp; + return tree; +} + +/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton + nodes. Requires a postorder visit. */ +static reg_errcode_t +calc_first (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + if (node->token.type == CONCAT) + { + node->first = node->left->first; + node->node_idx = node->left->node_idx; + } + else + { + node->first = node; + node->node_idx = re_dfa_add_node (dfa, node->token); + if (BE (node->node_idx == -1, 0)) + return REG_ESPACE; + } + return REG_NOERROR; +} + +/* Pass 2: compute NEXT on the tree. Preorder visit. */ +static reg_errcode_t +calc_next (void *extra, bin_tree_t *node) +{ + switch (node->token.type) + { + case OP_DUP_ASTERISK: + node->left->next = node; + break; + case CONCAT: + node->left->next = node->right->first; + node->right->next = node->next; + break; + default: + if (node->left) + node->left->next = node->next; + if (node->right) + node->right->next = node->next; + break; + } + return REG_NOERROR; +} + +/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */ +static reg_errcode_t +link_nfa_nodes (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + int idx = node->node_idx; + reg_errcode_t err = REG_NOERROR; + + switch (node->token.type) + { + case CONCAT: + break; + + case END_OF_RE: + assert (node->next == NULL); + break; + + case OP_DUP_ASTERISK: + case OP_ALT: + { + int left, right; + dfa->has_plural_match = 1; + if (node->left != NULL) + left = node->left->first->node_idx; + else + left = node->next->node_idx; + if (node->right != NULL) + right = node->right->first->node_idx; + else + right = node->next->node_idx; + assert (left > -1); + assert (right > -1); + err = re_node_set_init_2 (dfa->edests + idx, left, right); + } + break; + + case ANCHOR: + case OP_OPEN_SUBEXP: + case OP_CLOSE_SUBEXP: + err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx); + break; + + case OP_BACK_REF: + dfa->nexts[idx] = node->next->node_idx; + if (node->token.type == OP_BACK_REF) + re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]); + break; + + default: + assert (!IS_EPSILON_NODE (node->token.type)); + dfa->nexts[idx] = node->next->node_idx; + break; + } + + return err; +} + +/* Duplicate the epsilon closure of the node ROOT_NODE. + Note that duplicated nodes have constraint INIT_CONSTRAINT in addition + to their own constraint. */ + +static reg_errcode_t +internal_function +duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node, + int root_node, unsigned int init_constraint) +{ + int org_node, clone_node, ret; + unsigned int constraint = init_constraint; + for (org_node = top_org_node, clone_node = top_clone_node;;) + { + int org_dest, clone_dest; + if (dfa->nodes[org_node].type == OP_BACK_REF) + { + /* If the back reference epsilon-transit, its destination must + also have the constraint. Then duplicate the epsilon closure + of the destination of the back reference, and store it in + edests of the back reference. */ + org_dest = dfa->nexts[org_node]; + re_node_set_empty (dfa->edests + clone_node); + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + dfa->nexts[clone_node] = dfa->nexts[org_node]; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + else if (dfa->edests[org_node].nelem == 0) + { + /* In case of the node can't epsilon-transit, don't duplicate the + destination and store the original destination as the + destination of the node. */ + dfa->nexts[clone_node] = dfa->nexts[org_node]; + break; + } + else if (dfa->edests[org_node].nelem == 1) + { + /* In case of the node can epsilon-transit, and it has only one + destination. */ + org_dest = dfa->edests[org_node].elems[0]; + re_node_set_empty (dfa->edests + clone_node); + if (dfa->nodes[org_node].type == ANCHOR) + { + /* In case of the node has another constraint, append it. */ + if (org_node == root_node && clone_node != org_node) + { + /* ...but if the node is root_node itself, it means the + epsilon closure have a loop, then tie it to the + destination of the root_node. */ + ret = re_node_set_insert (dfa->edests + clone_node, + org_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + break; + } + constraint |= dfa->nodes[org_node].opr.ctx_type; + } + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + else /* dfa->edests[org_node].nelem == 2 */ + { + /* In case of the node can epsilon-transit, and it has two + destinations. In the bin_tree_t and DFA, that's '|' and '*'. */ + org_dest = dfa->edests[org_node].elems[0]; + re_node_set_empty (dfa->edests + clone_node); + /* Search for a duplicated node which satisfies the constraint. */ + clone_dest = search_duplicated_node (dfa, org_dest, constraint); + if (clone_dest == -1) + { + /* There are no such a duplicated node, create a new one. */ + reg_errcode_t err; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + err = duplicate_node_closure (dfa, org_dest, clone_dest, + root_node, constraint); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + { + /* There are a duplicated node which satisfy the constraint, + use it to avoid infinite loop. */ + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + + org_dest = dfa->edests[org_node].elems[1]; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + org_node = org_dest; + clone_node = clone_dest; + } + return REG_NOERROR; +} + +/* Search for a node which is duplicated from the node ORG_NODE, and + satisfies the constraint CONSTRAINT. */ + +static int +search_duplicated_node (const re_dfa_t *dfa, int org_node, + unsigned int constraint) +{ + int idx; + for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx) + { + if (org_node == dfa->org_indices[idx] + && constraint == dfa->nodes[idx].constraint) + return idx; /* Found. */ + } + return -1; /* Not found. */ +} + +/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT. + Return the index of the new node, or -1 if insufficient storage is + available. */ + +static int +duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint) +{ + int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]); + if (BE (dup_idx != -1, 1)) + { + dfa->nodes[dup_idx].constraint = constraint; + if (dfa->nodes[org_idx].type == ANCHOR) + dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type; + dfa->nodes[dup_idx].duplicated = 1; + + /* Store the index of the original node. */ + dfa->org_indices[dup_idx] = org_idx; + } + return dup_idx; +} + +static reg_errcode_t +calc_inveclosure (re_dfa_t *dfa) +{ + int src, idx, ret; + for (idx = 0; idx < dfa->nodes_len; ++idx) + re_node_set_init_empty (dfa->inveclosures + idx); + + for (src = 0; src < dfa->nodes_len; ++src) + { + int *elems = dfa->eclosures[src].elems; + for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) + { + ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src); + if (BE (ret == -1, 0)) + return REG_ESPACE; + } + } + + return REG_NOERROR; +} + +/* Calculate "eclosure" for all the node in DFA. */ + +static reg_errcode_t +calc_eclosure (re_dfa_t *dfa) +{ + int node_idx, incomplete; +#ifdef DEBUG + assert (dfa->nodes_len > 0); +#endif + incomplete = 0; + /* For each nodes, calculate epsilon closure. */ + for (node_idx = 0; ; ++node_idx) + { + reg_errcode_t err; + re_node_set eclosure_elem; + if (node_idx == dfa->nodes_len) + { + if (!incomplete) + break; + incomplete = 0; + node_idx = 0; + } + +#ifdef DEBUG + assert (dfa->eclosures[node_idx].nelem != -1); +#endif + + /* If we have already calculated, skip it. */ + if (dfa->eclosures[node_idx].nelem != 0) + continue; + /* Calculate epsilon closure of `node_idx'. */ + err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (dfa->eclosures[node_idx].nelem == 0) + { + incomplete = 1; + re_node_set_free (&eclosure_elem); + } + } + return REG_NOERROR; +} + +/* Calculate epsilon closure of NODE. */ + +static reg_errcode_t +calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root) +{ + reg_errcode_t err; + unsigned int constraint; + int i, incomplete; + re_node_set eclosure; + incomplete = 0; + err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* This indicates that we are calculating this node now. + We reference this value to avoid infinite loop. */ + dfa->eclosures[node].nelem = -1; + + constraint = ((dfa->nodes[node].type == ANCHOR) + ? dfa->nodes[node].opr.ctx_type : 0); + /* If the current node has constraints, duplicate all nodes. + Since they must inherit the constraints. */ + if (constraint + && dfa->edests[node].nelem + && !dfa->nodes[dfa->edests[node].elems[0]].duplicated) + { + err = duplicate_node_closure (dfa, node, node, node, constraint); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + /* Expand each epsilon destination nodes. */ + if (IS_EPSILON_NODE(dfa->nodes[node].type)) + for (i = 0; i < dfa->edests[node].nelem; ++i) + { + re_node_set eclosure_elem; + int edest = dfa->edests[node].elems[i]; + /* If calculating the epsilon closure of `edest' is in progress, + return intermediate result. */ + if (dfa->eclosures[edest].nelem == -1) + { + incomplete = 1; + continue; + } + /* If we haven't calculated the epsilon closure of `edest' yet, + calculate now. Otherwise use calculated epsilon closure. */ + if (dfa->eclosures[edest].nelem == 0) + { + err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + eclosure_elem = dfa->eclosures[edest]; + /* Merge the epsilon closure of `edest'. */ + re_node_set_merge (&eclosure, &eclosure_elem); + /* If the epsilon closure of `edest' is incomplete, + the epsilon closure of this node is also incomplete. */ + if (dfa->eclosures[edest].nelem == 0) + { + incomplete = 1; + re_node_set_free (&eclosure_elem); + } + } + + /* Epsilon closures include itself. */ + re_node_set_insert (&eclosure, node); + if (incomplete && !root) + dfa->eclosures[node].nelem = 0; + else + dfa->eclosures[node] = eclosure; + *new_set = eclosure; + return REG_NOERROR; +} + +/* Functions for token which are used in the parser. */ + +/* Fetch a token from INPUT. + We must not use this function inside bracket expressions. */ + +static void +internal_function +fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax) +{ + re_string_skip_bytes (input, peek_token (result, input, syntax)); +} + +/* Peek a token from INPUT, and return the length of the token. + We must not use this function inside bracket expressions. */ + +static int +internal_function +peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) +{ + unsigned char c; + + if (re_string_eoi (input)) + { + token->type = END_OF_RE; + return 0; + } + + c = re_string_peek_byte (input, 0); + token->opr.c = c; + + token->word_char = 0; +#ifdef RE_ENABLE_I18N + token->mb_partial = 0; + if (input->mb_cur_max > 1 && + !re_string_first_byte (input, re_string_cur_idx (input))) + { + token->type = CHARACTER; + token->mb_partial = 1; + return 1; + } +#endif + if (c == '\\') + { + unsigned char c2; + if (re_string_cur_idx (input) + 1 >= re_string_length (input)) + { + token->type = BACK_SLASH; + return 1; + } + + c2 = re_string_peek_byte_case (input, 1); + token->opr.c = c2; + token->type = CHARACTER; +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc = re_string_wchar_at (input, + re_string_cur_idx (input) + 1); + token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; + } + else +#endif + token->word_char = IS_WORD_CHAR (c2) != 0; + + switch (c2) + { + case '|': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR)) + token->type = OP_ALT; + break; + case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + if (!(syntax & RE_NO_BK_REFS)) + { + token->type = OP_BACK_REF; + token->opr.idx = c2 - '1'; + } + break; + case '<': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_FIRST; + } + break; + case '>': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_LAST; + } + break; + case 'b': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_DELIM; + } + break; + case 'B': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = NOT_WORD_DELIM; + } + break; + case 'w': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_WORD; + break; + case 'W': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_NOTWORD; + break; + case 's': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_SPACE; + break; + case 'S': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_NOTSPACE; + break; + case '`': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = BUF_FIRST; + } + break; + case '\'': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = BUF_LAST; + } + break; + case '(': + if (!(syntax & RE_NO_BK_PARENS)) + token->type = OP_OPEN_SUBEXP; + break; + case ')': + if (!(syntax & RE_NO_BK_PARENS)) + token->type = OP_CLOSE_SUBEXP; + break; + case '+': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_PLUS; + break; + case '?': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_QUESTION; + break; + case '{': + if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) + token->type = OP_OPEN_DUP_NUM; + break; + case '}': + if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) + token->type = OP_CLOSE_DUP_NUM; + break; + default: + break; + } + return 2; + } + + token->type = CHARACTER; +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input)); + token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; + } + else +#endif + token->word_char = IS_WORD_CHAR (token->opr.c); + + switch (c) + { + case '\n': + if (syntax & RE_NEWLINE_ALT) + token->type = OP_ALT; + break; + case '|': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR)) + token->type = OP_ALT; + break; + case '*': + token->type = OP_DUP_ASTERISK; + break; + case '+': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_PLUS; + break; + case '?': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_QUESTION; + break; + case '{': + if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) + token->type = OP_OPEN_DUP_NUM; + break; + case '}': + if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) + token->type = OP_CLOSE_DUP_NUM; + break; + case '(': + if (syntax & RE_NO_BK_PARENS) + token->type = OP_OPEN_SUBEXP; + break; + case ')': + if (syntax & RE_NO_BK_PARENS) + token->type = OP_CLOSE_SUBEXP; + break; + case '[': + token->type = OP_OPEN_BRACKET; + break; + case '.': + token->type = OP_PERIOD; + break; + case '^': + if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) && + re_string_cur_idx (input) != 0) + { + char prev = re_string_peek_byte (input, -1); + if (!(syntax & RE_NEWLINE_ALT) || prev != '\n') + break; + } + token->type = ANCHOR; + token->opr.ctx_type = LINE_FIRST; + break; + case '$': + if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && + re_string_cur_idx (input) + 1 != re_string_length (input)) + { + re_token_t next; + re_string_skip_bytes (input, 1); + peek_token (&next, input, syntax); + re_string_skip_bytes (input, -1); + if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP) + break; + } + token->type = ANCHOR; + token->opr.ctx_type = LINE_LAST; + break; + default: + break; + } + return 1; +} + +/* Peek a token from INPUT, and return the length of the token. + We must not use this function out of bracket expressions. */ + +static int +internal_function +peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) +{ + unsigned char c; + if (re_string_eoi (input)) + { + token->type = END_OF_RE; + return 0; + } + c = re_string_peek_byte (input, 0); + token->opr.c = c; + +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1 && + !re_string_first_byte (input, re_string_cur_idx (input))) + { + token->type = CHARACTER; + return 1; + } +#endif /* RE_ENABLE_I18N */ + + if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) + && re_string_cur_idx (input) + 1 < re_string_length (input)) + { + /* In this case, '\' escape a character. */ + unsigned char c2; + re_string_skip_bytes (input, 1); + c2 = re_string_peek_byte (input, 0); + token->opr.c = c2; + token->type = CHARACTER; + return 1; + } + if (c == '[') /* '[' is a special char in a bracket exps. */ + { + unsigned char c2; + int token_len; + if (re_string_cur_idx (input) + 1 < re_string_length (input)) + c2 = re_string_peek_byte (input, 1); + else + c2 = 0; + token->opr.c = c2; + token_len = 2; + switch (c2) + { + case '.': + token->type = OP_OPEN_COLL_ELEM; + break; + case '=': + token->type = OP_OPEN_EQUIV_CLASS; + break; + case ':': + if (syntax & RE_CHAR_CLASSES) + { + token->type = OP_OPEN_CHAR_CLASS; + break; + } + /* else fall through. */ + default: + token->type = CHARACTER; + token->opr.c = c; + token_len = 1; + break; + } + return token_len; + } + switch (c) + { + case '-': + token->type = OP_CHARSET_RANGE; + break; + case ']': + token->type = OP_CLOSE_BRACKET; + break; + case '^': + token->type = OP_NON_MATCH_LIST; + break; + default: + token->type = CHARACTER; + } + return 1; +} + +/* Functions for parser. */ + +/* Entry point of the parser. + Parse the regular expression REGEXP and return the structure tree. + If an error is occured, ERR is set by error code, and return NULL. + This function build the following tree, from regular expression : + CAT + / \ + / \ + EOR + + CAT means concatenation. + EOR means end of regular expression. */ + +static bin_tree_t * +parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax, + reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree, *eor, *root; + re_token_t current_token; + dfa->syntax = syntax; + fetch_token (¤t_token, regexp, syntax | RE_CARET_ANCHORS_HERE); + tree = parse_reg_exp (regexp, preg, ¤t_token, syntax, 0, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + eor = create_tree (dfa, NULL, NULL, END_OF_RE); + if (tree != NULL) + root = create_tree (dfa, tree, eor, CONCAT); + else + root = eor; + if (BE (eor == NULL || root == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + return root; +} + +/* This function build the following tree, from regular expression + |: + ALT + / \ + / \ + + + ALT means alternative, which represents the operator `|'. */ + +static bin_tree_t * +parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree, *branch = NULL; + tree = parse_branch (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + + while (token->type == OP_ALT) + { + fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); + if (token->type != OP_ALT && token->type != END_OF_RE + && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) + { + branch = parse_branch (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && branch == NULL, 0)) + return NULL; + } + else + branch = NULL; + tree = create_tree (dfa, tree, branch, OP_ALT); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + return tree; +} + +/* This function build the following tree, from regular expression + : + CAT + / \ + / \ + + + CAT means concatenation. */ + +static bin_tree_t * +parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + bin_tree_t *tree, *exp; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + tree = parse_expression (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + + while (token->type != OP_ALT && token->type != END_OF_RE + && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) + { + exp = parse_expression (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && exp == NULL, 0)) + { + return NULL; + } + if (tree != NULL && exp != NULL) + { + tree = create_tree (dfa, tree, exp, CONCAT); + if (tree == NULL) + { + *err = REG_ESPACE; + return NULL; + } + } + else if (tree == NULL) + tree = exp; + /* Otherwise exp == NULL, we don't need to create new tree. */ + } + return tree; +} + +/* This function build the following tree, from regular expression a*: + * + | + a +*/ + +static bin_tree_t * +parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree; + switch (token->type) + { + case CHARACTER: + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + while (!re_string_eoi (regexp) + && !re_string_first_byte (regexp, re_string_cur_idx (regexp))) + { + bin_tree_t *mbc_remain; + fetch_token (token, regexp, syntax); + mbc_remain = create_token_tree (dfa, NULL, NULL, token); + tree = create_tree (dfa, tree, mbc_remain, CONCAT); + if (BE (mbc_remain == NULL || tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + } +#endif + break; + case OP_OPEN_SUBEXP: + tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_OPEN_BRACKET: + tree = parse_bracket_exp (regexp, dfa, token, syntax, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_BACK_REF: + if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1)) + { + *err = REG_ESUBREG; + return NULL; + } + dfa->used_bkref_map |= 1 << token->opr.idx; + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + ++dfa->nbackref; + dfa->has_mb_node = 1; + break; + case OP_OPEN_DUP_NUM: + if (syntax & RE_CONTEXT_INVALID_DUP) + { + *err = REG_BADRPT; + return NULL; + } + /* FALLTHROUGH */ + case OP_DUP_ASTERISK: + case OP_DUP_PLUS: + case OP_DUP_QUESTION: + if (syntax & RE_CONTEXT_INVALID_OPS) + { + *err = REG_BADRPT; + return NULL; + } + else if (syntax & RE_CONTEXT_INDEP_OPS) + { + fetch_token (token, regexp, syntax); + return parse_expression (regexp, preg, token, syntax, nest, err); + } + /* else fall through */ + case OP_CLOSE_SUBEXP: + if ((token->type == OP_CLOSE_SUBEXP) && + !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)) + { + *err = REG_ERPAREN; + return NULL; + } + /* else fall through */ + case OP_CLOSE_DUP_NUM: + /* We treat it as a normal character. */ + + /* Then we can these characters as normal characters. */ + token->type = CHARACTER; + /* mb_partial and word_char bits should be initialized already + by peek_token. */ + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + break; + case ANCHOR: + if ((token->opr.ctx_type + & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST)) + && dfa->word_ops_used == 0) + init_word_char (dfa); + if (token->opr.ctx_type == WORD_DELIM + || token->opr.ctx_type == NOT_WORD_DELIM) + { + bin_tree_t *tree_first, *tree_last; + if (token->opr.ctx_type == WORD_DELIM) + { + token->opr.ctx_type = WORD_FIRST; + tree_first = create_token_tree (dfa, NULL, NULL, token); + token->opr.ctx_type = WORD_LAST; + } + else + { + token->opr.ctx_type = INSIDE_WORD; + tree_first = create_token_tree (dfa, NULL, NULL, token); + token->opr.ctx_type = INSIDE_NOTWORD; + } + tree_last = create_token_tree (dfa, NULL, NULL, token); + tree = create_tree (dfa, tree_first, tree_last, OP_ALT); + if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + else + { + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + /* We must return here, since ANCHORs can't be followed + by repetition operators. + eg. RE"^*" is invalid or "", + it must not be "". */ + fetch_token (token, regexp, syntax); + return tree; + case OP_PERIOD: + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + if (dfa->mb_cur_max > 1) + dfa->has_mb_node = 1; + break; + case OP_WORD: + case OP_NOTWORD: + tree = build_charclass_op (dfa, regexp->trans, + (const unsigned char *) "alnum", + (const unsigned char *) "_", + token->type == OP_NOTWORD, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_SPACE: + case OP_NOTSPACE: + tree = build_charclass_op (dfa, regexp->trans, + (const unsigned char *) "space", + (const unsigned char *) "", + token->type == OP_NOTSPACE, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_ALT: + case END_OF_RE: + return NULL; + case BACK_SLASH: + *err = REG_EESCAPE; + return NULL; + default: + /* Must not happen? */ +#ifdef DEBUG + assert (0); +#endif + return NULL; + } + fetch_token (token, regexp, syntax); + + while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS + || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM) + { + tree = parse_dup_op (tree, regexp, dfa, token, syntax, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + /* In BRE consecutive duplications are not allowed. */ + if ((syntax & RE_CONTEXT_INVALID_DUP) + && (token->type == OP_DUP_ASTERISK + || token->type == OP_OPEN_DUP_NUM)) + { + *err = REG_BADRPT; + return NULL; + } + } + + return tree; +} + +/* This function build the following tree, from regular expression + (): + SUBEXP + | + +*/ + +static bin_tree_t * +parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree; + size_t cur_nsub; + cur_nsub = preg->re_nsub++; + + fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); + + /* The subexpression may be a null string. */ + if (token->type == OP_CLOSE_SUBEXP) + tree = NULL; + else + { + tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); + if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0)) + *err = REG_EPAREN; + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } + + if (cur_nsub <= '9' - '1') + dfa->completed_bkref_map |= 1 << cur_nsub; + + tree = create_tree (dfa, tree, NULL, SUBEXP); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + tree->token.opr.idx = cur_nsub; + return tree; +} + +/* This function parse repetition operators like "*", "+", "{1,3}" etc. */ + +static bin_tree_t * +parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, + re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err) +{ + bin_tree_t *tree = NULL, *old_tree = NULL; + int i, start, end, start_idx = re_string_cur_idx (regexp); + re_token_t start_token = *token; + + if (token->type == OP_OPEN_DUP_NUM) + { + end = 0; + start = fetch_number (regexp, token, syntax); + if (start == -1) + { + if (token->type == CHARACTER && token->opr.c == ',') + start = 0; /* We treat "{,m}" as "{0,m}". */ + else + { + *err = REG_BADBR; /* {} is invalid. */ + return NULL; + } + } + if (BE (start != -2, 1)) + { + /* We treat "{n}" as "{n,n}". */ + end = ((token->type == OP_CLOSE_DUP_NUM) ? start + : ((token->type == CHARACTER && token->opr.c == ',') + ? fetch_number (regexp, token, syntax) : -2)); + } + if (BE (start == -2 || end == -2, 0)) + { + /* Invalid sequence. */ + if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0)) + { + if (token->type == END_OF_RE) + *err = REG_EBRACE; + else + *err = REG_BADBR; + + return NULL; + } + + /* If the syntax bit is set, rollback. */ + re_string_set_index (regexp, start_idx); + *token = start_token; + token->type = CHARACTER; + /* mb_partial and word_char bits should be already initialized by + peek_token. */ + return elem; + } + + if (BE (end != -1 && start > end, 0)) + { + /* First number greater than second. */ + *err = REG_BADBR; + return NULL; + } + } + else + { + start = (token->type == OP_DUP_PLUS) ? 1 : 0; + end = (token->type == OP_DUP_QUESTION) ? 1 : -1; + } + + fetch_token (token, regexp, syntax); + + if (BE (elem == NULL, 0)) + return NULL; + if (BE (start == 0 && end == 0, 0)) + { + postorder (elem, free_tree, NULL); + return NULL; + } + + /* Extract "{n,m}" to "...{0,}". */ + if (BE (start > 0, 0)) + { + tree = elem; + for (i = 2; i <= start; ++i) + { + elem = duplicate_tree (elem, dfa); + tree = create_tree (dfa, tree, elem, CONCAT); + if (BE (elem == NULL || tree == NULL, 0)) + goto parse_dup_op_espace; + } + + if (start == end) + return tree; + + /* Duplicate ELEM before it is marked optional. */ + elem = duplicate_tree (elem, dfa); + old_tree = tree; + } + else + old_tree = NULL; + + if (elem->token.type == SUBEXP) + postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx); + + tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT)); + if (BE (tree == NULL, 0)) + goto parse_dup_op_espace; + + /* This loop is actually executed only when end != -1, + to rewrite {0,n} as ((...?)?)?... We have + already created the start+1-th copy. */ + for (i = start + 2; i <= end; ++i) + { + elem = duplicate_tree (elem, dfa); + tree = create_tree (dfa, tree, elem, CONCAT); + if (BE (elem == NULL || tree == NULL, 0)) + goto parse_dup_op_espace; + + tree = create_tree (dfa, tree, NULL, OP_ALT); + if (BE (tree == NULL, 0)) + goto parse_dup_op_espace; + } + + if (old_tree) + tree = create_tree (dfa, old_tree, tree, CONCAT); + + return tree; + + parse_dup_op_espace: + *err = REG_ESPACE; + return NULL; +} + +/* Size of the names for collating symbol/equivalence_class/character_class. + I'm not sure, but maybe enough. */ +#define BRACKET_NAME_BUF_SIZE 32 + +#ifndef _LIBC + /* Local function for parse_bracket_exp only used in case of NOT _LIBC. + Build the range expression which starts from START_ELEM, and ends + at END_ELEM. The result are written to MBCSET and SBCSET. + RANGE_ALLOC is the allocated size of mbcset->range_starts, and + mbcset->range_ends, is a pointer argument sinse we may + update it. */ + +static reg_errcode_t +internal_function +# ifdef RE_ENABLE_I18N +build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc, + bracket_elem_t *start_elem, bracket_elem_t *end_elem) +# else /* not RE_ENABLE_I18N */ +build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem, + bracket_elem_t *end_elem) +# endif /* not RE_ENABLE_I18N */ +{ + unsigned int start_ch, end_ch; + /* Equivalence Classes and Character Classes can't be a range start/end. */ + if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS + || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, + 0)) + return REG_ERANGE; + + /* We can handle no multi character collating elements without libc + support. */ + if (BE ((start_elem->type == COLL_SYM + && strlen ((char *) start_elem->opr.name) > 1) + || (end_elem->type == COLL_SYM + && strlen ((char *) end_elem->opr.name) > 1), 0)) + return REG_ECOLLATE; + +# ifdef RE_ENABLE_I18N + { + wchar_t wc; + wint_t start_wc; + wint_t end_wc; + wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; + + start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch + : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] + : 0)); + end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch + : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] + : 0)); + start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) + ? __btowc (start_ch) : start_elem->opr.wch); + end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) + ? __btowc (end_ch) : end_elem->opr.wch); + if (start_wc == WEOF || end_wc == WEOF) + return REG_ECOLLATE; + cmp_buf[0] = start_wc; + cmp_buf[4] = end_wc; + if (wcscoll (cmp_buf, cmp_buf + 4) > 0) + return REG_ERANGE; + + /* Got valid collation sequence values, add them as a new entry. + However, for !_LIBC we have no collation elements: if the + character set is single byte, the single byte character set + that we build below suffices. parse_bracket_exp passes + no MBCSET if dfa->mb_cur_max == 1. */ + if (mbcset) + { + /* Check the space of the arrays. */ + if (BE (*range_alloc == mbcset->nranges, 0)) + { + /* There is not enough space, need realloc. */ + wchar_t *new_array_start, *new_array_end; + int new_nranges; + + /* +1 in case of mbcset->nranges is 0. */ + new_nranges = 2 * mbcset->nranges + 1; + /* Use realloc since mbcset->range_starts and mbcset->range_ends + are NULL if *range_alloc == 0. */ + new_array_start = re_realloc (mbcset->range_starts, wchar_t, + new_nranges); + new_array_end = re_realloc (mbcset->range_ends, wchar_t, + new_nranges); + + if (BE (new_array_start == NULL || new_array_end == NULL, 0)) + return REG_ESPACE; + + mbcset->range_starts = new_array_start; + mbcset->range_ends = new_array_end; + *range_alloc = new_nranges; + } + + mbcset->range_starts[mbcset->nranges] = start_wc; + mbcset->range_ends[mbcset->nranges++] = end_wc; + } + + /* Build the table for single byte characters. */ + for (wc = 0; wc < SBC_MAX; ++wc) + { + cmp_buf[2] = wc; + if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 + && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + bitset_set (sbcset, wc); + } + } +# else /* not RE_ENABLE_I18N */ + { + unsigned int ch; + start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch + : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] + : 0)); + end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch + : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] + : 0)); + if (start_ch > end_ch) + return REG_ERANGE; + /* Build the table for single byte characters. */ + for (ch = 0; ch < SBC_MAX; ++ch) + if (start_ch <= ch && ch <= end_ch) + bitset_set (sbcset, ch); + } +# endif /* not RE_ENABLE_I18N */ + return REG_NOERROR; +} +#endif /* not _LIBC */ + +#ifndef _LIBC +/* Helper function for parse_bracket_exp only used in case of NOT _LIBC.. + Build the collating element which is represented by NAME. + The result are written to MBCSET and SBCSET. + COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a + pointer argument since we may update it. */ + +static reg_errcode_t +internal_function +# ifdef RE_ENABLE_I18N +build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, + int *coll_sym_alloc, const unsigned char *name) +# else /* not RE_ENABLE_I18N */ +build_collating_symbol (bitset_t sbcset, const unsigned char *name) +# endif /* not RE_ENABLE_I18N */ +{ + size_t name_len = strlen ((const char *) name); + if (BE (name_len != 1, 0)) + return REG_ECOLLATE; + else + { + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } +} +#endif /* not _LIBC */ + +/* This function parse bracket expression like "[abc]", "[a-c]", + "[[.a-a.]]" etc. */ + +static bin_tree_t * +parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, + reg_syntax_t syntax, reg_errcode_t *err) +{ +#ifdef _LIBC + const unsigned char *collseqmb; + const char *collseqwc; + uint32_t nrules; + int32_t table_size; + const int32_t *symb_table; + const unsigned char *extra; + + /* Local function for parse_bracket_exp used in _LIBC environement. + Seek the collating symbol entry correspondings to NAME. + Return the index of the symbol in the SYMB_TABLE. */ + + auto inline int32_t + __attribute ((always_inline)) + seek_collating_symbol_entry (name, name_len) + const unsigned char *name; + size_t name_len; + { + int32_t hash = elem_hash ((const char *) name, name_len); + int32_t elem = hash % table_size; + if (symb_table[2 * elem] != 0) + { + int32_t second = hash % (table_size - 2) + 1; + + do + { + /* First compare the hashing value. */ + if (symb_table[2 * elem] == hash + /* Compare the length of the name. */ + && name_len == extra[symb_table[2 * elem + 1]] + /* Compare the name. */ + && memcmp (name, &extra[symb_table[2 * elem + 1] + 1], + name_len) == 0) + { + /* Yep, this is the entry. */ + break; + } + + /* Next entry. */ + elem += second; + } + while (symb_table[2 * elem] != 0); + } + return elem; + } + + /* Local function for parse_bracket_exp used in _LIBC environement. + Look up the collation sequence value of BR_ELEM. + Return the value if succeeded, UINT_MAX otherwise. */ + + auto inline unsigned int + __attribute ((always_inline)) + lookup_collation_sequence_value (br_elem) + bracket_elem_t *br_elem; + { + if (br_elem->type == SB_CHAR) + { + /* + if (MB_CUR_MAX == 1) + */ + if (nrules == 0) + return collseqmb[br_elem->opr.ch]; + else + { + wint_t wc = __btowc (br_elem->opr.ch); + return __collseq_table_lookup (collseqwc, wc); + } + } + else if (br_elem->type == MB_CHAR) + { + return __collseq_table_lookup (collseqwc, br_elem->opr.wch); + } + else if (br_elem->type == COLL_SYM) + { + size_t sym_name_len = strlen ((char *) br_elem->opr.name); + if (nrules != 0) + { + int32_t elem, idx; + elem = seek_collating_symbol_entry (br_elem->opr.name, + sym_name_len); + if (symb_table[2 * elem] != 0) + { + /* We found the entry. */ + idx = symb_table[2 * elem + 1]; + /* Skip the name of collating element name. */ + idx += 1 + extra[idx]; + /* Skip the byte sequence of the collating element. */ + idx += 1 + extra[idx]; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; + /* Skip the multibyte collation sequence value. */ + idx += sizeof (unsigned int); + /* Skip the wide char sequence of the collating element. */ + idx += sizeof (unsigned int) * + (1 + *(unsigned int *) (extra + idx)); + /* Return the collation sequence value. */ + return *(unsigned int *) (extra + idx); + } + else if (symb_table[2 * elem] == 0 && sym_name_len == 1) + { + /* No valid character. Match it as a single byte + character. */ + return collseqmb[br_elem->opr.name[0]]; + } + } + else if (sym_name_len == 1) + return collseqmb[br_elem->opr.name[0]]; + } + return UINT_MAX; + } + + /* Local function for parse_bracket_exp used in _LIBC environement. + Build the range expression which starts from START_ELEM, and ends + at END_ELEM. The result are written to MBCSET and SBCSET. + RANGE_ALLOC is the allocated size of mbcset->range_starts, and + mbcset->range_ends, is a pointer argument sinse we may + update it. */ + + auto inline reg_errcode_t + __attribute ((always_inline)) + build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) + re_charset_t *mbcset; + int *range_alloc; + bitset_t sbcset; + bracket_elem_t *start_elem, *end_elem; + { + unsigned int ch; + uint32_t start_collseq; + uint32_t end_collseq; + + /* Equivalence Classes and Character Classes can't be a range + start/end. */ + if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS + || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, + 0)) + return REG_ERANGE; + + start_collseq = lookup_collation_sequence_value (start_elem); + end_collseq = lookup_collation_sequence_value (end_elem); + /* Check start/end collation sequence values. */ + if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0)) + return REG_ECOLLATE; + if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0)) + return REG_ERANGE; + + /* Got valid collation sequence values, add them as a new entry. + However, if we have no collation elements, and the character set + is single byte, the single byte character set that we + build below suffices. */ + if (nrules > 0 || dfa->mb_cur_max > 1) + { + /* Check the space of the arrays. */ + if (BE (*range_alloc == mbcset->nranges, 0)) + { + /* There is not enough space, need realloc. */ + uint32_t *new_array_start; + uint32_t *new_array_end; + int new_nranges; + + /* +1 in case of mbcset->nranges is 0. */ + new_nranges = 2 * mbcset->nranges + 1; + new_array_start = re_realloc (mbcset->range_starts, uint32_t, + new_nranges); + new_array_end = re_realloc (mbcset->range_ends, uint32_t, + new_nranges); + + if (BE (new_array_start == NULL || new_array_end == NULL, 0)) + return REG_ESPACE; + + mbcset->range_starts = new_array_start; + mbcset->range_ends = new_array_end; + *range_alloc = new_nranges; + } + + mbcset->range_starts[mbcset->nranges] = start_collseq; + mbcset->range_ends[mbcset->nranges++] = end_collseq; + } + + /* Build the table for single byte characters. */ + for (ch = 0; ch < SBC_MAX; ch++) + { + uint32_t ch_collseq; + /* + if (MB_CUR_MAX == 1) + */ + if (nrules == 0) + ch_collseq = collseqmb[ch]; + else + ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch)); + if (start_collseq <= ch_collseq && ch_collseq <= end_collseq) + bitset_set (sbcset, ch); + } + return REG_NOERROR; + } + + /* Local function for parse_bracket_exp used in _LIBC environement. + Build the collating element which is represented by NAME. + The result are written to MBCSET and SBCSET. + COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a + pointer argument sinse we may update it. */ + + auto inline reg_errcode_t + __attribute ((always_inline)) + build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) + re_charset_t *mbcset; + int *coll_sym_alloc; + bitset_t sbcset; + const unsigned char *name; + { + int32_t elem, idx; + size_t name_len = strlen ((const char *) name); + if (nrules != 0) + { + elem = seek_collating_symbol_entry (name, name_len); + if (symb_table[2 * elem] != 0) + { + /* We found the entry. */ + idx = symb_table[2 * elem + 1]; + /* Skip the name of collating element name. */ + idx += 1 + extra[idx]; + } + else if (symb_table[2 * elem] == 0 && name_len == 1) + { + /* No valid character, treat it as a normal + character. */ + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } + else + return REG_ECOLLATE; + + /* Got valid collation sequence, add it as a new entry. */ + /* Check the space of the arrays. */ + if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->ncoll_syms is 0. */ + int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1; + /* Use realloc since mbcset->coll_syms is NULL + if *alloc == 0. */ + int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t, + new_coll_sym_alloc); + if (BE (new_coll_syms == NULL, 0)) + return REG_ESPACE; + mbcset->coll_syms = new_coll_syms; + *coll_sym_alloc = new_coll_sym_alloc; + } + mbcset->coll_syms[mbcset->ncoll_syms++] = idx; + return REG_NOERROR; + } + else + { + if (BE (name_len != 1, 0)) + return REG_ECOLLATE; + else + { + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } + } + } +#endif + + re_token_t br_token; + re_bitset_ptr_t sbcset; +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; + int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0; + int equiv_class_alloc = 0, char_class_alloc = 0; +#endif /* not RE_ENABLE_I18N */ + int non_match = 0; + bin_tree_t *work_tree; + int token_len; + int first_round = 1; +#ifdef _LIBC + collseqmb = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); + nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules) + { + /* + if (MB_CUR_MAX > 1) + */ + collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); + table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB); + symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_SYMB_TABLEMB); + extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_SYMB_EXTRAMB); + } +#endif + sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); +#ifdef RE_ENABLE_I18N + mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); +#endif /* RE_ENABLE_I18N */ +#ifdef RE_ENABLE_I18N + if (BE (sbcset == NULL || mbcset == NULL, 0)) +#else + if (BE (sbcset == NULL, 0)) +#endif /* RE_ENABLE_I18N */ + { + *err = REG_ESPACE; + return NULL; + } + + token_len = peek_token_bracket (token, regexp, syntax); + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_BADPAT; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_NON_MATCH_LIST) + { +#ifdef RE_ENABLE_I18N + mbcset->non_match = 1; +#endif /* not RE_ENABLE_I18N */ + non_match = 1; + if (syntax & RE_HAT_LISTS_NOT_NEWLINE) + bitset_set (sbcset, '\0'); + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + token_len = peek_token_bracket (token, regexp, syntax); + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_BADPAT; + goto parse_bracket_exp_free_return; + } + } + + /* We treat the first ']' as a normal character. */ + if (token->type == OP_CLOSE_BRACKET) + token->type = CHARACTER; + + while (1) + { + bracket_elem_t start_elem, end_elem; + unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE]; + unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE]; + reg_errcode_t ret; + int token_len2 = 0, is_range_exp = 0; + re_token_t token2; + + start_elem.opr.name = start_name_buf; + ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa, + syntax, first_round); + if (BE (ret != REG_NOERROR, 0)) + { + *err = ret; + goto parse_bracket_exp_free_return; + } + first_round = 0; + + /* Get information about the next token. We need it in any case. */ + token_len = peek_token_bracket (token, regexp, syntax); + + /* Do not check for ranges if we know they are not allowed. */ + if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS) + { + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_CHARSET_RANGE) + { + re_string_skip_bytes (regexp, token_len); /* Skip '-'. */ + token_len2 = peek_token_bracket (&token2, regexp, syntax); + if (BE (token2.type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token2.type == OP_CLOSE_BRACKET) + { + /* We treat the last '-' as a normal character. */ + re_string_skip_bytes (regexp, -token_len); + token->type = CHARACTER; + } + else + is_range_exp = 1; + } + } + + if (is_range_exp == 1) + { + end_elem.opr.name = end_name_buf; + ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2, + dfa, syntax, 1); + if (BE (ret != REG_NOERROR, 0)) + { + *err = ret; + goto parse_bracket_exp_free_return; + } + + token_len = peek_token_bracket (token, regexp, syntax); + +#ifdef _LIBC + *err = build_range_exp (sbcset, mbcset, &range_alloc, + &start_elem, &end_elem); +#else +# ifdef RE_ENABLE_I18N + *err = build_range_exp (sbcset, + dfa->mb_cur_max > 1 ? mbcset : NULL, + &range_alloc, &start_elem, &end_elem); +# else + *err = build_range_exp (sbcset, &start_elem, &end_elem); +# endif +#endif /* RE_ENABLE_I18N */ + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + } + else + { + switch (start_elem.type) + { + case SB_CHAR: + bitset_set (sbcset, start_elem.opr.ch); + break; +#ifdef RE_ENABLE_I18N + case MB_CHAR: + /* Check whether the array has enough space. */ + if (BE (mbchar_alloc == mbcset->nmbchars, 0)) + { + wchar_t *new_mbchars; + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nmbchars is 0. */ + mbchar_alloc = 2 * mbcset->nmbchars + 1; + /* Use realloc since array is NULL if *alloc == 0. */ + new_mbchars = re_realloc (mbcset->mbchars, wchar_t, + mbchar_alloc); + if (BE (new_mbchars == NULL, 0)) + goto parse_bracket_exp_espace; + mbcset->mbchars = new_mbchars; + } + mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; + break; +#endif /* RE_ENABLE_I18N */ + case EQUIV_CLASS: + *err = build_equiv_class (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &equiv_class_alloc, +#endif /* RE_ENABLE_I18N */ + start_elem.opr.name); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + case COLL_SYM: + *err = build_collating_symbol (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &coll_sym_alloc, +#endif /* RE_ENABLE_I18N */ + start_elem.opr.name); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + case CHAR_CLASS: + *err = build_charclass (regexp->trans, sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &char_class_alloc, +#endif /* RE_ENABLE_I18N */ + start_elem.opr.name, syntax); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + default: + assert (0); + break; + } + } + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_CLOSE_BRACKET) + break; + } + + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + + /* If it is non-matching list. */ + if (non_match) + bitset_not (sbcset); + +#ifdef RE_ENABLE_I18N + /* Ensure only single byte characters are set. */ + if (dfa->mb_cur_max > 1) + bitset_mask (sbcset, dfa->sb_char); + + if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes + || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes + || mbcset->non_match))) + { + bin_tree_t *mbc_tree; + int sbc_idx; + /* Build a tree for complex bracket. */ + dfa->has_mb_node = 1; + br_token.type = COMPLEX_BRACKET; + br_token.opr.mbcset = mbcset; + mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (mbc_tree == NULL, 0)) + goto parse_bracket_exp_espace; + for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx) + if (sbcset[sbc_idx]) + break; + /* If there are no bits set in sbcset, there is no point + of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */ + if (sbc_idx < BITSET_WORDS) + { + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + work_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + + /* Then join them by ALT node. */ + work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + } + else + { + re_free (sbcset); + work_tree = mbc_tree; + } + } + else +#endif /* not RE_ENABLE_I18N */ + { +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + work_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + } + return work_tree; + + parse_bracket_exp_espace: + *err = REG_ESPACE; + parse_bracket_exp_free_return: + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + return NULL; +} + +/* Parse an element in the bracket expression. */ + +static reg_errcode_t +parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp, + re_token_t *token, int token_len, re_dfa_t *dfa, + reg_syntax_t syntax, int accept_hyphen) +{ +#ifdef RE_ENABLE_I18N + int cur_char_size; + cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp)); + if (cur_char_size > 1) + { + elem->type = MB_CHAR; + elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp)); + re_string_skip_bytes (regexp, cur_char_size); + return REG_NOERROR; + } +#endif /* RE_ENABLE_I18N */ + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS + || token->type == OP_OPEN_EQUIV_CLASS) + return parse_bracket_symbol (elem, regexp, token); + if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen) + { + /* A '-' must only appear as anything but a range indicator before + the closing bracket. Everything else is an error. */ + re_token_t token2; + (void) peek_token_bracket (&token2, regexp, syntax); + if (token2.type != OP_CLOSE_BRACKET) + /* The actual error value is not standardized since this whole + case is undefined. But ERANGE makes good sense. */ + return REG_ERANGE; + } + elem->type = SB_CHAR; + elem->opr.ch = token->opr.c; + return REG_NOERROR; +} + +/* Parse a bracket symbol in the bracket expression. Bracket symbols are + such as [::], [..], and + [==]. */ + +static reg_errcode_t +parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, + re_token_t *token) +{ + unsigned char ch, delim = token->opr.c; + int i = 0; + if (re_string_eoi(regexp)) + return REG_EBRACK; + for (;; ++i) + { + if (i >= BRACKET_NAME_BUF_SIZE) + return REG_EBRACK; + if (token->type == OP_OPEN_CHAR_CLASS) + ch = re_string_fetch_byte_case (regexp); + else + ch = re_string_fetch_byte (regexp); + if (re_string_eoi(regexp)) + return REG_EBRACK; + if (ch == delim && re_string_peek_byte (regexp, 0) == ']') + break; + elem->opr.name[i] = ch; + } + re_string_skip_bytes (regexp, 1); + elem->opr.name[i] = '\0'; + switch (token->type) + { + case OP_OPEN_COLL_ELEM: + elem->type = COLL_SYM; + break; + case OP_OPEN_EQUIV_CLASS: + elem->type = EQUIV_CLASS; + break; + case OP_OPEN_CHAR_CLASS: + elem->type = CHAR_CLASS; + break; + default: + break; + } + return REG_NOERROR; +} + + /* Helper function for parse_bracket_exp. + Build the equivalence class which is represented by NAME. + The result are written to MBCSET and SBCSET. + EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes, + is a pointer argument sinse we may update it. */ + +static reg_errcode_t +#ifdef RE_ENABLE_I18N +build_equiv_class (bitset_t sbcset, re_charset_t *mbcset, + int *equiv_class_alloc, const unsigned char *name) +#else /* not RE_ENABLE_I18N */ +build_equiv_class (bitset_t sbcset, const unsigned char *name) +#endif /* not RE_ENABLE_I18N */ +{ +#ifdef _LIBC + uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules != 0) + { + const int32_t *table, *indirect; + const unsigned char *weights, *extra, *cp; + unsigned char char_buf[2]; + int32_t idx1, idx2; + unsigned int ch; + size_t len; + /* This #include defines a local function! */ +# include + /* Calculate the index for equivalence class. */ + cp = name; + table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_WEIGHTMB); + extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_INDIRECTMB); + idx1 = findidx (&cp); + if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0)) + /* This isn't a valid character. */ + return REG_ECOLLATE; + + /* Build single byte matcing table for this equivalence class. */ + char_buf[1] = (unsigned char) '\0'; + len = weights[idx1]; + for (ch = 0; ch < SBC_MAX; ++ch) + { + char_buf[0] = ch; + cp = char_buf; + idx2 = findidx (&cp); +/* + idx2 = table[ch]; +*/ + if (idx2 == 0) + /* This isn't a valid character. */ + continue; + if (len == weights[idx2]) + { + int cnt = 0; + while (cnt <= len && + weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt]) + ++cnt; + + if (cnt > len) + bitset_set (sbcset, ch); + } + } + /* Check whether the array has enough space. */ + if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nequiv_classes is 0. */ + int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1; + /* Use realloc since the array is NULL if *alloc == 0. */ + int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes, + int32_t, + new_equiv_class_alloc); + if (BE (new_equiv_classes == NULL, 0)) + return REG_ESPACE; + mbcset->equiv_classes = new_equiv_classes; + *equiv_class_alloc = new_equiv_class_alloc; + } + mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1; + } + else +#endif /* _LIBC */ + { + if (BE (strlen ((const char *) name) != 1, 0)) + return REG_ECOLLATE; + bitset_set (sbcset, *name); + } + return REG_NOERROR; +} + + /* Helper function for parse_bracket_exp. + Build the character class which is represented by NAME. + The result are written to MBCSET and SBCSET. + CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes, + is a pointer argument sinse we may update it. */ + +static reg_errcode_t +#ifdef RE_ENABLE_I18N +build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, + re_charset_t *mbcset, int *char_class_alloc, + const unsigned char *class_name, reg_syntax_t syntax) +#else /* not RE_ENABLE_I18N */ +build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, + const unsigned char *class_name, reg_syntax_t syntax) +#endif /* not RE_ENABLE_I18N */ +{ + int i; + const char *name = (const char *) class_name; + + /* In case of REG_ICASE "upper" and "lower" match the both of + upper and lower cases. */ + if ((syntax & RE_ICASE) + && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0)) + name = "alpha"; + +#ifdef RE_ENABLE_I18N + /* Check the space of the arrays. */ + if (BE (*char_class_alloc == mbcset->nchar_classes, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nchar_classes is 0. */ + int new_char_class_alloc = 2 * mbcset->nchar_classes + 1; + /* Use realloc since array is NULL if *alloc == 0. */ + wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t, + new_char_class_alloc); + if (BE (new_char_classes == NULL, 0)) + return REG_ESPACE; + mbcset->char_classes = new_char_classes; + *char_class_alloc = new_char_class_alloc; + } + mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name); +#endif /* RE_ENABLE_I18N */ + +#define BUILD_CHARCLASS_LOOP(ctype_func) \ + do { \ + if (BE (trans != NULL, 0)) \ + { \ + for (i = 0; i < SBC_MAX; ++i) \ + if (ctype_func (i)) \ + bitset_set (sbcset, trans[i]); \ + } \ + else \ + { \ + for (i = 0; i < SBC_MAX; ++i) \ + if (ctype_func (i)) \ + bitset_set (sbcset, i); \ + } \ + } while (0) + + if (strcmp (name, "alnum") == 0) + BUILD_CHARCLASS_LOOP (isalnum); + else if (strcmp (name, "cntrl") == 0) + BUILD_CHARCLASS_LOOP (iscntrl); + else if (strcmp (name, "lower") == 0) + BUILD_CHARCLASS_LOOP (islower); + else if (strcmp (name, "space") == 0) + BUILD_CHARCLASS_LOOP (isspace); + else if (strcmp (name, "alpha") == 0) + BUILD_CHARCLASS_LOOP (isalpha); + else if (strcmp (name, "digit") == 0) + BUILD_CHARCLASS_LOOP (isdigit); + else if (strcmp (name, "print") == 0) + BUILD_CHARCLASS_LOOP (isprint); + else if (strcmp (name, "upper") == 0) + BUILD_CHARCLASS_LOOP (isupper); + else if (strcmp (name, "blank") == 0) + BUILD_CHARCLASS_LOOP (isblank); + else if (strcmp (name, "graph") == 0) + BUILD_CHARCLASS_LOOP (isgraph); + else if (strcmp (name, "punct") == 0) + BUILD_CHARCLASS_LOOP (ispunct); + else if (strcmp (name, "xdigit") == 0) + BUILD_CHARCLASS_LOOP (isxdigit); + else + return REG_ECTYPE; + + return REG_NOERROR; +} + +static bin_tree_t * +build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, + const unsigned char *class_name, + const unsigned char *extra, int non_match, + reg_errcode_t *err) +{ + re_bitset_ptr_t sbcset; +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; + int alloc = 0; +#endif /* not RE_ENABLE_I18N */ + reg_errcode_t ret; + re_token_t br_token; + bin_tree_t *tree; + + sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); +#ifdef RE_ENABLE_I18N + mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); +#endif /* RE_ENABLE_I18N */ + +#ifdef RE_ENABLE_I18N + if (BE (sbcset == NULL || mbcset == NULL, 0)) +#else /* not RE_ENABLE_I18N */ + if (BE (sbcset == NULL, 0)) +#endif /* not RE_ENABLE_I18N */ + { + *err = REG_ESPACE; + return NULL; + } + + if (non_match) + { +#ifdef RE_ENABLE_I18N + /* + if (syntax & RE_HAT_LISTS_NOT_NEWLINE) + bitset_set(cset->sbcset, '\0'); + */ + mbcset->non_match = 1; +#endif /* not RE_ENABLE_I18N */ + } + + /* We don't care the syntax in this case. */ + ret = build_charclass (trans, sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &alloc, +#endif /* RE_ENABLE_I18N */ + class_name, 0); + + if (BE (ret != REG_NOERROR, 0)) + { + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + *err = ret; + return NULL; + } + /* \w match '_' also. */ + for (; *extra; extra++) + bitset_set (sbcset, *extra); + + /* If it is non-matching list. */ + if (non_match) + bitset_not (sbcset); + +#ifdef RE_ENABLE_I18N + /* Ensure only single byte characters are set. */ + if (dfa->mb_cur_max > 1) + bitset_mask (sbcset, dfa->sb_char); +#endif + + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (tree == NULL, 0)) + goto build_word_op_espace; + +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + bin_tree_t *mbc_tree; + /* Build a tree for complex bracket. */ + br_token.type = COMPLEX_BRACKET; + br_token.opr.mbcset = mbcset; + dfa->has_mb_node = 1; + mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (mbc_tree == NULL, 0)) + goto build_word_op_espace; + /* Then join them by ALT node. */ + tree = create_tree (dfa, tree, mbc_tree, OP_ALT); + if (BE (mbc_tree != NULL, 1)) + return tree; + } + else + { + free_charset (mbcset); + return tree; + } +#else /* not RE_ENABLE_I18N */ + return tree; +#endif /* not RE_ENABLE_I18N */ + + build_word_op_espace: + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + *err = REG_ESPACE; + return NULL; +} + +/* This is intended for the expressions like "a{1,3}". + Fetch a number from `input', and return the number. + Return -1, if the number field is empty like "{,1}". + Return -2, If an error is occured. */ + +static int +fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax) +{ + int num = -1; + unsigned char c; + while (1) + { + fetch_token (token, input, syntax); + c = token->opr.c; + if (BE (token->type == END_OF_RE, 0)) + return -2; + if (token->type == OP_CLOSE_DUP_NUM || c == ',') + break; + num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2) + ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0')); + num = (num > RE_DUP_MAX) ? -2 : num; + } + return num; +} + +#ifdef RE_ENABLE_I18N +static void +free_charset (re_charset_t *cset) +{ + re_free (cset->mbchars); +# ifdef _LIBC + re_free (cset->coll_syms); + re_free (cset->equiv_classes); + re_free (cset->range_starts); + re_free (cset->range_ends); +# endif + re_free (cset->char_classes); + re_free (cset); +} +#endif /* RE_ENABLE_I18N */ + +/* Functions for binary tree operation. */ + +/* Create a tree node. */ + +static bin_tree_t * +create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, + re_token_type_t type) +{ + re_token_t t; + t.type = type; + return create_token_tree (dfa, left, right, &t); +} + +static bin_tree_t * +create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, + const re_token_t *token) +{ + bin_tree_t *tree; + if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0)) + { + bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1); + + if (storage == NULL) + return NULL; + storage->next = dfa->str_tree_storage; + dfa->str_tree_storage = storage; + dfa->str_tree_storage_idx = 0; + } + tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++]; + + tree->parent = NULL; + tree->left = left; + tree->right = right; + tree->token = *token; + tree->token.duplicated = 0; + tree->token.opt_subexp = 0; + tree->first = NULL; + tree->next = NULL; + tree->node_idx = -1; + + if (left != NULL) + left->parent = tree; + if (right != NULL) + right->parent = tree; + return tree; +} + +/* Mark the tree SRC as an optional subexpression. + To be called from preorder or postorder. */ + +static reg_errcode_t +mark_opt_subexp (void *extra, bin_tree_t *node) +{ + int idx = (int) (long) extra; + if (node->token.type == SUBEXP && node->token.opr.idx == idx) + node->token.opt_subexp = 1; + + return REG_NOERROR; +} + +/* Free the allocated memory inside NODE. */ + +static void +free_token (re_token_t *node) +{ +#ifdef RE_ENABLE_I18N + if (node->type == COMPLEX_BRACKET && node->duplicated == 0) + free_charset (node->opr.mbcset); + else +#endif /* RE_ENABLE_I18N */ + if (node->type == SIMPLE_BRACKET && node->duplicated == 0) + re_free (node->opr.sbcset); +} + +/* Worker function for tree walking. Free the allocated memory inside NODE + and its children. */ + +static reg_errcode_t +free_tree (void *extra, bin_tree_t *node) +{ + free_token (&node->token); + return REG_NOERROR; +} + + +/* Duplicate the node SRC, and return new node. This is a preorder + visit similar to the one implemented by the generic visitor, but + we need more infrastructure to maintain two parallel trees --- so, + it's easier to duplicate. */ + +static bin_tree_t * +duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa) +{ + const bin_tree_t *node; + bin_tree_t *dup_root; + bin_tree_t **p_new = &dup_root, *dup_node = root->parent; + + for (node = root; ; ) + { + /* Create a new tree and link it back to the current parent. */ + *p_new = create_token_tree (dfa, NULL, NULL, &node->token); + if (*p_new == NULL) + return NULL; + (*p_new)->parent = dup_node; + (*p_new)->token.duplicated = 1; + dup_node = *p_new; + + /* Go to the left node, or up and to the right. */ + if (node->left) + { + node = node->left; + p_new = &dup_node->left; + } + else + { + const bin_tree_t *prev = NULL; + while (node->right == prev || node->right == NULL) + { + prev = node; + node = node->parent; + dup_node = dup_node->parent; + if (!node) + return dup_root; + } + node = node->right; + p_new = &dup_node->right; + } + } +} + +/******************************************************************************/ +/******************************************************************************/ +/******************************************************************************/ +/* GKINCLUDE #include "regexec.c" */ +/******************************************************************************/ +/******************************************************************************/ +/******************************************************************************/ +static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, + int n) internal_function; +static void match_ctx_clean (re_match_context_t *mctx) internal_function; +static void match_ctx_free (re_match_context_t *cache) internal_function; +static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node, + int str_idx, int from, int to) + internal_function; +static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) + internal_function; +static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node, + int str_idx) internal_function; +static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop, + int node, int str_idx) + internal_function; +static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, + re_dfastate_t **limited_sts, int last_node, + int last_str_idx) + internal_function; +static reg_errcode_t re_search_internal (const regex_t *preg, + const char *string, int length, + int start, int range, int stop, + size_t nmatch, regmatch_t pmatch[], + int eflags) internal_function; +static int re_search_2_stub (struct re_pattern_buffer *bufp, + const char *string1, int length1, + const char *string2, int length2, + int start, int range, struct re_registers *regs, + int stop, int ret_len) internal_function; +static int re_search_stub (struct re_pattern_buffer *bufp, + const char *string, int length, int start, + int range, int stop, struct re_registers *regs, + int ret_len) internal_function; +static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, + int nregs, int regs_allocated) internal_function; +static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx) + internal_function; +static int check_matching (re_match_context_t *mctx, int fl_longest_match, + int *p_match_first) internal_function; +static int check_halt_state_context (const re_match_context_t *mctx, + const re_dfastate_t *state, int idx) + internal_function; +static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, + regmatch_t *prev_idx_match, int cur_node, + int cur_idx, int nmatch) internal_function; +static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, + int str_idx, int dest_node, int nregs, + regmatch_t *regs, + re_node_set *eps_via_nodes) + internal_function; +static reg_errcode_t set_regs (const regex_t *preg, + const re_match_context_t *mctx, + size_t nmatch, regmatch_t *pmatch, + int fl_backtrack) internal_function; +static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) + internal_function; + +#ifdef RE_ENABLE_I18N +static int sift_states_iter_mb (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int node_idx, int str_idx, int max_str_idx) + internal_function; +#endif /* RE_ENABLE_I18N */ +static reg_errcode_t sift_states_backward (const re_match_context_t *mctx, + re_sift_context_t *sctx) + internal_function; +static reg_errcode_t build_sifted_states (const re_match_context_t *mctx, + re_sift_context_t *sctx, int str_idx, + re_node_set *cur_dest) + internal_function; +static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int str_idx, + re_node_set *dest_nodes) + internal_function; +static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa, + re_node_set *dest_nodes, + const re_node_set *candidates) + internal_function; +static int check_dst_limits (const re_match_context_t *mctx, + re_node_set *limits, + int dst_node, int dst_idx, int src_node, + int src_idx) internal_function; +static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, + int boundaries, int subexp_idx, + int from_node, int bkref_idx) + internal_function; +static int check_dst_limits_calc_pos (const re_match_context_t *mctx, + int limit, int subexp_idx, + int node, int str_idx, + int bkref_idx) internal_function; +static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa, + re_node_set *dest_nodes, + const re_node_set *candidates, + re_node_set *limits, + struct re_backref_cache_entry *bkref_ents, + int str_idx) internal_function; +static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int str_idx, const re_node_set *candidates) + internal_function; +static reg_errcode_t merge_state_array (const re_dfa_t *dfa, + re_dfastate_t **dst, + re_dfastate_t **src, int num) + internal_function; +static re_dfastate_t *find_recover_state (reg_errcode_t *err, + re_match_context_t *mctx) internal_function; +static re_dfastate_t *transit_state (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *state) internal_function; +static re_dfastate_t *merge_state_with_log (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *next_state) + internal_function; +static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx, + re_node_set *cur_nodes, + int str_idx) internal_function; +#if 0 +static re_dfastate_t *transit_state_sb (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *pstate) + internal_function; +#endif +#ifdef RE_ENABLE_I18N +static reg_errcode_t transit_state_mb (re_match_context_t *mctx, + re_dfastate_t *pstate) + internal_function; +#endif /* RE_ENABLE_I18N */ +static reg_errcode_t transit_state_bkref (re_match_context_t *mctx, + const re_node_set *nodes) + internal_function; +static reg_errcode_t get_subexp (re_match_context_t *mctx, + int bkref_node, int bkref_str_idx) + internal_function; +static reg_errcode_t get_subexp_sub (re_match_context_t *mctx, + const re_sub_match_top_t *sub_top, + re_sub_match_last_t *sub_last, + int bkref_node, int bkref_str) + internal_function; +static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, + int subexp_idx, int type) internal_function; +static reg_errcode_t check_arrival (re_match_context_t *mctx, + state_array_t *path, int top_node, + int top_str, int last_node, int last_str, + int type) internal_function; +static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx, + int str_idx, + re_node_set *cur_nodes, + re_node_set *next_nodes) + internal_function; +static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa, + re_node_set *cur_nodes, + int ex_subexp, int type) + internal_function; +static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa, + re_node_set *dst_nodes, + int target, int ex_subexp, + int type) internal_function; +static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx, + re_node_set *cur_nodes, int cur_str, + int subexp_num, int type) + internal_function; +static int build_trtable (const re_dfa_t *dfa, + re_dfastate_t *state) internal_function; +#ifdef RE_ENABLE_I18N +static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, + const re_string_t *input, int idx) + internal_function; +# ifdef _LIBC +static unsigned int find_collation_sequence_value (const unsigned char *mbs, + size_t name_len) + internal_function; +# endif /* _LIBC */ +#endif /* RE_ENABLE_I18N */ +static int group_nodes_into_DFAstates (const re_dfa_t *dfa, + const re_dfastate_t *state, + re_node_set *states_node, + bitset_t *states_ch) internal_function; +static int check_node_accept (const re_match_context_t *mctx, + const re_token_t *node, int idx) + internal_function; +static reg_errcode_t extend_buffers (re_match_context_t *mctx) + internal_function; + +/* Entry point for POSIX code. */ + +/* regexec searches for a given pattern, specified by PREG, in the + string STRING. + + If NMATCH is zero or REG_NOSUB was set in the cflags argument to + `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at + least NMATCH elements, and we set them to the offsets of the + corresponding matched substrings. + + EFLAGS specifies `execution flags' which affect matching: if + REG_NOTBOL is set, then ^ does not match at the beginning of the + string; if REG_NOTEOL is set, then $ does not match at the end. + + We return 0 if we find a match and REG_NOMATCH if not. */ + +int +regexec (preg, string, nmatch, pmatch, eflags) + const regex_t *__restrict preg; + const char *__restrict string; + size_t nmatch; + regmatch_t pmatch[]; + int eflags; +{ + reg_errcode_t err; + int start, length; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + + if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) + return REG_BADPAT; + + if (eflags & REG_STARTEND) + { + start = pmatch[0].rm_so; + length = pmatch[0].rm_eo; + } + else + { + start = 0; + length = strlen (string); + } + + __libc_lock_lock (dfa->lock); + if (preg->no_sub) + err = re_search_internal (preg, string, length, start, length - start, + length, 0, NULL, eflags); + else + err = re_search_internal (preg, string, length, start, length - start, + length, nmatch, pmatch, eflags); + __libc_lock_unlock (dfa->lock); + return err != REG_NOERROR; +} + +#ifdef _LIBC +# include +versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4); + +# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) +__typeof__ (__regexec) __compat_regexec; + +int +attribute_compat_text_section +__compat_regexec (const regex_t *__restrict preg, + const char *__restrict string, size_t nmatch, + regmatch_t pmatch[], int eflags) +{ + return regexec (preg, string, nmatch, pmatch, + eflags & (REG_NOTBOL | REG_NOTEOL)); +} +compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0); +# endif +#endif + +/* Entry points for GNU code. */ + +/* re_match, re_search, re_match_2, re_search_2 + + The former two functions operate on STRING with length LENGTH, + while the later two operate on concatenation of STRING1 and STRING2 + with lengths LENGTH1 and LENGTH2, respectively. + + re_match() matches the compiled pattern in BUFP against the string, + starting at index START. + + re_search() first tries matching at index START, then it tries to match + starting from index START + 1, and so on. The last start position tried + is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same + way as re_match().) + + The parameter STOP of re_{match,search}_2 specifies that no match exceeding + the first STOP characters of the concatenation of the strings should be + concerned. + + If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match + and all groups is stroed in REGS. (For the "_2" variants, the offsets are + computed relative to the concatenation, not relative to the individual + strings.) + + On success, re_match* functions return the length of the match, re_search* + return the position of the start of the match. Return value -1 means no + match was found and -2 indicates an internal error. */ + +int +re_match (bufp, string, length, start, regs) + struct re_pattern_buffer *bufp; + const char *string; + int length, start; + struct re_registers *regs; +{ + return re_search_stub (bufp, string, length, start, 0, length, regs, 1); +} +#ifdef _LIBC +weak_alias (__re_match, re_match) +#endif + +int +re_search (bufp, string, length, start, range, regs) + struct re_pattern_buffer *bufp; + const char *string; + int length, start, range; + struct re_registers *regs; +{ + return re_search_stub (bufp, string, length, start, range, length, regs, 0); +} +#ifdef _LIBC +weak_alias (__re_search, re_search) +#endif + +int +re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int length1, length2, start, stop; + struct re_registers *regs; +{ + return re_search_2_stub (bufp, string1, length1, string2, length2, + start, 0, regs, stop, 1); +} +#ifdef _LIBC +weak_alias (__re_match_2, re_match_2) +#endif + +int +re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int length1, length2, start, range, stop; + struct re_registers *regs; +{ + return re_search_2_stub (bufp, string1, length1, string2, length2, + start, range, regs, stop, 0); +} +#ifdef _LIBC +weak_alias (__re_search_2, re_search_2) +#endif + +static int +re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs, + stop, ret_len) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int length1, length2, start, range, stop, ret_len; + struct re_registers *regs; +{ + const char *str; + int rval; + int len = length1 + length2; + int free_str = 0; + + if (BE (length1 < 0 || length2 < 0 || stop < 0, 0)) + return -2; + + /* Concatenate the strings. */ + if (length2 > 0) + if (length1 > 0) + { + char *s = re_malloc (char, len); + + if (BE (s == NULL, 0)) + return -2; +#ifdef _LIBC + memcpy (__mempcpy (s, string1, length1), string2, length2); +#else + memcpy (s, string1, length1); + memcpy (s + length1, string2, length2); +#endif + str = s; + free_str = 1; + } + else + str = string2; + else + str = string1; + + rval = re_search_stub (bufp, str, len, start, range, stop, regs, + ret_len); + if (free_str) + re_free ((char *) str); + return rval; +} + +/* The parameters have the same meaning as those of re_search. + Additional parameters: + If RET_LEN is nonzero the length of the match is returned (re_match style); + otherwise the position of the match is returned. */ + +static int +re_search_stub (bufp, string, length, start, range, stop, regs, ret_len) + struct re_pattern_buffer *bufp; + const char *string; + int length, start, range, stop, ret_len; + struct re_registers *regs; +{ + reg_errcode_t result; + regmatch_t *pmatch; + int nregs, rval; + int eflags = 0; + re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; + + /* Check for out-of-range. */ + if (BE (start < 0 || start > length, 0)) + return -1; + if (BE (start + range > length, 0)) + range = length - start; + else if (BE (start + range < 0, 0)) + range = -start; + + __libc_lock_lock (dfa->lock); + + eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; + eflags |= (bufp->not_eol) ? REG_NOTEOL : 0; + + /* Compile fastmap if we haven't yet. */ + if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate) + re_compile_fastmap (bufp); + + if (BE (bufp->no_sub, 0)) + regs = NULL; + + /* We need at least 1 register. */ + if (regs == NULL) + nregs = 1; + else if (BE (bufp->regs_allocated == REGS_FIXED && + regs->num_regs < bufp->re_nsub + 1, 0)) + { + nregs = regs->num_regs; + if (BE (nregs < 1, 0)) + { + /* Nothing can be copied to regs. */ + regs = NULL; + nregs = 1; + } + } + else + nregs = bufp->re_nsub + 1; + pmatch = re_malloc (regmatch_t, nregs); + if (BE (pmatch == NULL, 0)) + { + rval = -2; + goto out; + } + + result = re_search_internal (bufp, string, length, start, range, stop, + nregs, pmatch, eflags); + + rval = 0; + + /* I hope we needn't fill ther regs with -1's when no match was found. */ + if (result != REG_NOERROR) + rval = -1; + else if (regs != NULL) + { + /* If caller wants register contents data back, copy them. */ + bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs, + bufp->regs_allocated); + if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0)) + rval = -2; + } + + if (BE (rval == 0, 1)) + { + if (ret_len) + { + assert (pmatch[0].rm_so == start); + rval = pmatch[0].rm_eo - start; + } + else + rval = pmatch[0].rm_so; + } + re_free (pmatch); + out: + __libc_lock_unlock (dfa->lock); + return rval; +} + +static unsigned +re_copy_regs (regs, pmatch, nregs, regs_allocated) + struct re_registers *regs; + regmatch_t *pmatch; + int nregs, regs_allocated; +{ + int rval = REGS_REALLOCATE; + int i; + int need_regs = nregs + 1; + /* We need one extra element beyond `num_regs' for the `-1' marker GNU code + uses. */ + + /* Have the register data arrays been allocated? */ + if (regs_allocated == REGS_UNALLOCATED) + { /* No. So allocate them with malloc. */ + regs->start = re_malloc (regoff_t, need_regs); + regs->end = re_malloc (regoff_t, need_regs); + if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0)) + return REGS_UNALLOCATED; + regs->num_regs = need_regs; + } + else if (regs_allocated == REGS_REALLOCATE) + { /* Yes. If we need more elements than were already + allocated, reallocate them. If we need fewer, just + leave it alone. */ + if (BE (need_regs > regs->num_regs, 0)) + { + regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs); + regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs); + if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0)) + return REGS_UNALLOCATED; + regs->start = new_start; + regs->end = new_end; + regs->num_regs = need_regs; + } + } + else + { + assert (regs_allocated == REGS_FIXED); + /* This function may not be called with REGS_FIXED and nregs too big. */ + assert (regs->num_regs >= nregs); + rval = REGS_FIXED; + } + + /* Copy the regs. */ + for (i = 0; i < nregs; ++i) + { + regs->start[i] = pmatch[i].rm_so; + regs->end[i] = pmatch[i].rm_eo; + } + for ( ; i < regs->num_regs; ++i) + regs->start[i] = regs->end[i] = -1; + + return rval; +} + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use + this memory for recording register information. STARTS and ENDS + must be allocated using the malloc library routine, and must each + be at least NUM_REGS * sizeof (regoff_t) bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ + +void +re_set_registers (bufp, regs, num_regs, starts, ends) + struct re_pattern_buffer *bufp; + struct re_registers *regs; + unsigned num_regs; + regoff_t *starts, *ends; +{ + if (num_regs) + { + bufp->regs_allocated = REGS_REALLOCATE; + regs->num_regs = num_regs; + regs->start = starts; + regs->end = ends; + } + else + { + bufp->regs_allocated = REGS_UNALLOCATED; + regs->num_regs = 0; + regs->start = regs->end = (regoff_t *) 0; + } +} +#ifdef _LIBC +weak_alias (__re_set_registers, re_set_registers) +#endif + +/* Entry points compatible with 4.2 BSD regex library. We don't define + them unless specifically requested. */ + +#if defined _REGEX_RE_COMP || defined _LIBC +int +# ifdef _LIBC +weak_function +# endif +re_exec (s) + const char *s; +{ + return 0 == regexec (&re_comp_buf, s, 0, NULL, 0); +} +#endif /* _REGEX_RE_COMP */ + +/* Internal entry point. */ + +/* Searches for a compiled pattern PREG in the string STRING, whose + length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same + mingings with regexec. START, and RANGE have the same meanings + with re_search. + Return REG_NOERROR if we find a match, and REG_NOMATCH if not, + otherwise return the error code. + Note: We assume front end functions already check ranges. + (START + RANGE >= 0 && START + RANGE <= LENGTH) */ + +static reg_errcode_t +re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, + eflags) + const regex_t *preg; + const char *string; + int length, start, range, stop, eflags; + size_t nmatch; + regmatch_t pmatch[]; +{ + reg_errcode_t err; + const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; + int left_lim, right_lim, incr; + int fl_longest_match, match_first, match_kind, match_last = -1; + int extra_nmatch; + int sb, ch; +#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) + re_match_context_t mctx = { .dfa = dfa }; +#else + re_match_context_t mctx; +#endif + char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate + && range && !preg->can_be_null) ? preg->fastmap : NULL; + RE_TRANSLATE_TYPE t = preg->translate; + +#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)) + memset (&mctx, '\0', sizeof (re_match_context_t)); + mctx.dfa = dfa; +#endif + + extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0; + nmatch -= extra_nmatch; + + /* Check if the DFA haven't been compiled. */ + if (BE (preg->used == 0 || dfa->init_state == NULL + || dfa->init_state_word == NULL || dfa->init_state_nl == NULL + || dfa->init_state_begbuf == NULL, 0)) + return REG_NOMATCH; + +#ifdef DEBUG + /* We assume front-end functions already check them. */ + assert (start + range >= 0 && start + range <= length); +#endif + + /* If initial states with non-begbuf contexts have no elements, + the regex must be anchored. If preg->newline_anchor is set, + we'll never use init_state_nl, so do not check it. */ + if (dfa->init_state->nodes.nelem == 0 + && dfa->init_state_word->nodes.nelem == 0 + && (dfa->init_state_nl->nodes.nelem == 0 + || !preg->newline_anchor)) + { + if (start != 0 && start + range != 0) + return REG_NOMATCH; + start = range = 0; + } + + /* We must check the longest matching, if nmatch > 0. */ + fl_longest_match = (nmatch != 0 || dfa->nbackref); + + err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1, + preg->translate, preg->syntax & RE_ICASE, dfa); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + mctx.input.stop = stop; + mctx.input.raw_stop = stop; + mctx.input.newline_anchor = preg->newline_anchor; + + err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* We will log all the DFA states through which the dfa pass, + if nmatch > 1, or this dfa has "multibyte node", which is a + back-reference or a node which can accept multibyte character or + multi character collating element. */ + if (nmatch > 1 || dfa->has_mb_node) + { + mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1); + if (BE (mctx.state_log == NULL, 0)) + { + err = REG_ESPACE; + goto free_return; + } + } + else + mctx.state_log = NULL; + + match_first = start; + mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF + : CONTEXT_NEWLINE | CONTEXT_BEGBUF; + + /* Check incrementally whether of not the input string match. */ + incr = (range < 0) ? -1 : 1; + left_lim = (range < 0) ? start + range : start; + right_lim = (range < 0) ? start : start + range; + sb = dfa->mb_cur_max == 1; + match_kind = + (fastmap + ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0) + | (range >= 0 ? 2 : 0) + | (t != NULL ? 1 : 0)) + : 8); + + for (;; match_first += incr) + { + err = REG_NOMATCH; + if (match_first < left_lim || right_lim < match_first) + goto free_return; + + /* Advance as rapidly as possible through the string, until we + find a plausible place to start matching. This may be done + with varying efficiency, so there are various possibilities: + only the most common of them are specialized, in order to + save on code size. We use a switch statement for speed. */ + switch (match_kind) + { + case 8: + /* No fastmap. */ + break; + + case 7: + /* Fastmap with single-byte translation, match forward. */ + while (BE (match_first < right_lim, 1) + && !fastmap[t[(unsigned char) string[match_first]]]) + ++match_first; + goto forward_match_found_start_or_reached_end; + + case 6: + /* Fastmap without translation, match forward. */ + while (BE (match_first < right_lim, 1) + && !fastmap[(unsigned char) string[match_first]]) + ++match_first; + + forward_match_found_start_or_reached_end: + if (BE (match_first == right_lim, 0)) + { + ch = match_first >= length + ? 0 : (unsigned char) string[match_first]; + if (!fastmap[t ? t[ch] : ch]) + goto free_return; + } + break; + + case 4: + case 5: + /* Fastmap without multi-byte translation, match backwards. */ + while (match_first >= left_lim) + { + ch = match_first >= length + ? 0 : (unsigned char) string[match_first]; + if (fastmap[t ? t[ch] : ch]) + break; + --match_first; + } + if (match_first < left_lim) + goto free_return; + break; + + default: + /* In this case, we can't determine easily the current byte, + since it might be a component byte of a multibyte + character. Then we use the constructed buffer instead. */ + for (;;) + { + /* If MATCH_FIRST is out of the valid range, reconstruct the + buffers. */ + unsigned int offset = match_first - mctx.input.raw_mbs_idx; + if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0)) + { + err = re_string_reconstruct (&mctx.input, match_first, + eflags); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + offset = match_first - mctx.input.raw_mbs_idx; + } + /* If MATCH_FIRST is out of the buffer, leave it as '\0'. + Note that MATCH_FIRST must not be smaller than 0. */ + ch = (match_first >= length + ? 0 : re_string_byte_at (&mctx.input, offset)); + if (fastmap[ch]) + break; + match_first += incr; + if (match_first < left_lim || match_first > right_lim) + { + err = REG_NOMATCH; + goto free_return; + } + } + break; + } + + /* Reconstruct the buffers so that the matcher can assume that + the matching starts from the beginning of the buffer. */ + err = re_string_reconstruct (&mctx.input, match_first, eflags); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + +#ifdef RE_ENABLE_I18N + /* Don't consider this char as a possible match start if it part, + yet isn't the head, of a multibyte character. */ + if (!sb && !re_string_first_byte (&mctx.input, 0)) + continue; +#endif + + /* It seems to be appropriate one, then use the matcher. */ + /* We assume that the matching starts from 0. */ + mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; + match_last = check_matching (&mctx, fl_longest_match, + range >= 0 ? &match_first : NULL); + if (match_last != -1) + { + if (BE (match_last == -2, 0)) + { + err = REG_ESPACE; + goto free_return; + } + else + { + mctx.match_last = match_last; + if ((!preg->no_sub && nmatch > 1) || dfa->nbackref) + { + re_dfastate_t *pstate = mctx.state_log[match_last]; + mctx.last_node = check_halt_state_context (&mctx, pstate, + match_last); + } + if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match) + || dfa->nbackref) + { + err = prune_impossible_nodes (&mctx); + if (err == REG_NOERROR) + break; + if (BE (err != REG_NOMATCH, 0)) + goto free_return; + match_last = -1; + } + else + break; /* We found a match. */ + } + } + + match_ctx_clean (&mctx); + } + +#ifdef DEBUG + assert (match_last != -1); + assert (err == REG_NOERROR); +#endif + + /* Set pmatch[] if we need. */ + if (nmatch > 0) + { + int reg_idx; + + /* Initialize registers. */ + for (reg_idx = 1; reg_idx < nmatch; ++reg_idx) + pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1; + + /* Set the points where matching start/end. */ + pmatch[0].rm_so = 0; + pmatch[0].rm_eo = mctx.match_last; + + if (!preg->no_sub && nmatch > 1) + { + err = set_regs (preg, &mctx, nmatch, pmatch, + dfa->has_plural_match && dfa->nbackref > 0); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + + /* At last, add the offset to the each registers, since we slided + the buffers so that we could assume that the matching starts + from 0. */ + for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) + if (pmatch[reg_idx].rm_so != -1) + { +#ifdef RE_ENABLE_I18N + if (BE (mctx.input.offsets_needed != 0, 0)) + { + pmatch[reg_idx].rm_so = + (pmatch[reg_idx].rm_so == mctx.input.valid_len + ? mctx.input.valid_raw_len + : mctx.input.offsets[pmatch[reg_idx].rm_so]); + pmatch[reg_idx].rm_eo = + (pmatch[reg_idx].rm_eo == mctx.input.valid_len + ? mctx.input.valid_raw_len + : mctx.input.offsets[pmatch[reg_idx].rm_eo]); + } +#else + assert (mctx.input.offsets_needed == 0); +#endif + pmatch[reg_idx].rm_so += match_first; + pmatch[reg_idx].rm_eo += match_first; + } + for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx) + { + pmatch[nmatch + reg_idx].rm_so = -1; + pmatch[nmatch + reg_idx].rm_eo = -1; + } + + if (dfa->subexp_map) + for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++) + if (dfa->subexp_map[reg_idx] != reg_idx) + { + pmatch[reg_idx + 1].rm_so + = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so; + pmatch[reg_idx + 1].rm_eo + = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo; + } + } + + free_return: + re_free (mctx.state_log); + if (dfa->nbackref) + match_ctx_free (&mctx); + re_string_destruct (&mctx.input); + return err; +} + +static reg_errcode_t +prune_impossible_nodes (mctx) + re_match_context_t *mctx; +{ + const re_dfa_t *const dfa = mctx->dfa; + int halt_node, match_last; + reg_errcode_t ret; + re_dfastate_t **sifted_states; + re_dfastate_t **lim_states = NULL; + re_sift_context_t sctx; +#ifdef DEBUG + assert (mctx->state_log != NULL); +#endif + match_last = mctx->match_last; + halt_node = mctx->last_node; + sifted_states = re_malloc (re_dfastate_t *, match_last + 1); + if (BE (sifted_states == NULL, 0)) + { + ret = REG_ESPACE; + goto free_return; + } + if (dfa->nbackref) + { + lim_states = re_malloc (re_dfastate_t *, match_last + 1); + if (BE (lim_states == NULL, 0)) + { + ret = REG_ESPACE; + goto free_return; + } + while (1) + { + memset (lim_states, '\0', + sizeof (re_dfastate_t *) * (match_last + 1)); + sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, + match_last); + ret = sift_states_backward (mctx, &sctx); + re_node_set_free (&sctx.limits); + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + if (sifted_states[0] != NULL || lim_states[0] != NULL) + break; + do + { + --match_last; + if (match_last < 0) + { + ret = REG_NOMATCH; + goto free_return; + } + } while (mctx->state_log[match_last] == NULL + || !mctx->state_log[match_last]->halt); + halt_node = check_halt_state_context (mctx, + mctx->state_log[match_last], + match_last); + } + ret = merge_state_array (dfa, sifted_states, lim_states, + match_last + 1); + re_free (lim_states); + lim_states = NULL; + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + } + else + { + sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last); + ret = sift_states_backward (mctx, &sctx); + re_node_set_free (&sctx.limits); + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + } + re_free (mctx->state_log); + mctx->state_log = sifted_states; + sifted_states = NULL; + mctx->last_node = halt_node; + mctx->match_last = match_last; + ret = REG_NOERROR; + free_return: + re_free (sifted_states); + re_free (lim_states); + return ret; +} + +/* Acquire an initial state and return it. + We must select appropriate initial state depending on the context, + since initial states may have constraints like "\<", "^", etc.. */ + +static inline re_dfastate_t * +__attribute ((always_inline)) internal_function +acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, + int idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + if (dfa->init_state->has_constraint) + { + unsigned int context; + context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags); + if (IS_WORD_CONTEXT (context)) + return dfa->init_state_word; + else if (IS_ORDINARY_CONTEXT (context)) + return dfa->init_state; + else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context)) + return dfa->init_state_begbuf; + else if (IS_NEWLINE_CONTEXT (context)) + return dfa->init_state_nl; + else if (IS_BEGBUF_CONTEXT (context)) + { + /* It is relatively rare case, then calculate on demand. */ + return re_acquire_state_context (err, dfa, + dfa->init_state->entrance_nodes, + context); + } + else + /* Must not happen? */ + return dfa->init_state; + } + else + return dfa->init_state; +} + +/* Check whether the regular expression match input string INPUT or not, + and return the index where the matching end, return -1 if not match, + or return -2 in case of an error. + FL_LONGEST_MATCH means we want the POSIX longest matching. + If P_MATCH_FIRST is not NULL, and the match fails, it is set to the + next place where we may want to try matching. + Note that the matcher assume that the maching starts from the current + index of the buffer. */ + +static int +internal_function +check_matching (re_match_context_t *mctx, int fl_longest_match, + int *p_match_first) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int match = 0; + int match_last = -1; + int cur_str_idx = re_string_cur_idx (&mctx->input); + re_dfastate_t *cur_state; + int at_init_state = p_match_first != NULL; + int next_start_idx = cur_str_idx; + + err = REG_NOERROR; + cur_state = acquire_init_state_context (&err, mctx, cur_str_idx); + /* An initial state must not be NULL (invalid). */ + if (BE (cur_state == NULL, 0)) + { + assert (err == REG_ESPACE); + return -2; + } + + if (mctx->state_log != NULL) + { + mctx->state_log[cur_str_idx] = cur_state; + + /* Check OP_OPEN_SUBEXP in the initial state in case that we use them + later. E.g. Processing back references. */ + if (BE (dfa->nbackref, 0)) + { + at_init_state = 0; + err = check_subexp_matching_top (mctx, &cur_state->nodes, 0); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (cur_state->has_backref) + { + err = transit_state_bkref (mctx, &cur_state->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + } + + /* If the RE accepts NULL string. */ + if (BE (cur_state->halt, 0)) + { + if (!cur_state->has_constraint + || check_halt_state_context (mctx, cur_state, cur_str_idx)) + { + if (!fl_longest_match) + return cur_str_idx; + else + { + match_last = cur_str_idx; + match = 1; + } + } + } + + while (!re_string_eoi (&mctx->input)) + { + re_dfastate_t *old_state = cur_state; + int next_char_idx = re_string_cur_idx (&mctx->input) + 1; + + if (BE (next_char_idx >= mctx->input.bufs_len, 0) + || (BE (next_char_idx >= mctx->input.valid_len, 0) + && mctx->input.valid_len < mctx->input.len)) + { + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + { + assert (err == REG_ESPACE); + return -2; + } + } + + cur_state = transit_state (&err, mctx, cur_state); + if (mctx->state_log != NULL) + cur_state = merge_state_with_log (&err, mctx, cur_state); + + if (cur_state == NULL) + { + /* Reached the invalid state or an error. Try to recover a valid + state using the state log, if available and if we have not + already found a valid (even if not the longest) match. */ + if (BE (err != REG_NOERROR, 0)) + return -2; + + if (mctx->state_log == NULL + || (match && !fl_longest_match) + || (cur_state = find_recover_state (&err, mctx)) == NULL) + break; + } + + if (BE (at_init_state, 0)) + { + if (old_state == cur_state) + next_start_idx = next_char_idx; + else + at_init_state = 0; + } + + if (cur_state->halt) + { + /* Reached a halt state. + Check the halt state can satisfy the current context. */ + if (!cur_state->has_constraint + || check_halt_state_context (mctx, cur_state, + re_string_cur_idx (&mctx->input))) + { + /* We found an appropriate halt state. */ + match_last = re_string_cur_idx (&mctx->input); + match = 1; + + /* We found a match, do not modify match_first below. */ + p_match_first = NULL; + if (!fl_longest_match) + break; + } + } + } + + if (p_match_first) + *p_match_first += next_start_idx; + + return match_last; +} + +/* Check NODE match the current context. */ + +static int +internal_function +check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context) +{ + re_token_type_t type = dfa->nodes[node].type; + unsigned int constraint = dfa->nodes[node].constraint; + if (type != END_OF_RE) + return 0; + if (!constraint) + return 1; + if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context)) + return 0; + return 1; +} + +/* Check the halt state STATE match the current context. + Return 0 if not match, if the node, STATE has, is a halt node and + match the context, return the node. */ + +static int +internal_function +check_halt_state_context (const re_match_context_t *mctx, + const re_dfastate_t *state, int idx) +{ + int i; + unsigned int context; +#ifdef DEBUG + assert (state->halt); +#endif + context = re_string_context_at (&mctx->input, idx, mctx->eflags); + for (i = 0; i < state->nodes.nelem; ++i) + if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context)) + return state->nodes.elems[i]; + return 0; +} + +/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA + corresponding to the DFA). + Return the destination node, and update EPS_VIA_NODES, return -1 in case + of errors. */ + +static int +internal_function +proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs, + int *pidx, int node, re_node_set *eps_via_nodes, + struct re_fail_stack_t *fs) +{ + const re_dfa_t *const dfa = mctx->dfa; + int i, err; + if (IS_EPSILON_NODE (dfa->nodes[node].type)) + { + re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; + re_node_set *edests = &dfa->edests[node]; + int dest_node; + err = re_node_set_insert (eps_via_nodes, node); + if (BE (err < 0, 0)) + return -2; + /* Pick up a valid destination, or return -1 if none is found. */ + for (dest_node = -1, i = 0; i < edests->nelem; ++i) + { + int candidate = edests->elems[i]; + if (!re_node_set_contains (cur_nodes, candidate)) + continue; + if (dest_node == -1) + dest_node = candidate; + + else + { + /* In order to avoid infinite loop like "(a*)*", return the second + epsilon-transition if the first was already considered. */ + if (re_node_set_contains (eps_via_nodes, dest_node)) + return candidate; + + /* Otherwise, push the second epsilon-transition on the fail stack. */ + else if (fs != NULL + && push_fail_stack (fs, *pidx, candidate, nregs, regs, + eps_via_nodes)) + return -2; + + /* We know we are going to exit. */ + break; + } + } + return dest_node; + } + else + { + int naccepted = 0; + re_token_type_t type = dfa->nodes[node].type; + +#ifdef RE_ENABLE_I18N + if (dfa->nodes[node].accept_mb) + naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx); + else +#endif /* RE_ENABLE_I18N */ + if (type == OP_BACK_REF) + { + int subexp_idx = dfa->nodes[node].opr.idx + 1; + naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; + if (fs != NULL) + { + if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1) + return -1; + else if (naccepted) + { + char *buf = (char *) re_string_get_buffer (&mctx->input); + if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, + naccepted) != 0) + return -1; + } + } + + if (naccepted == 0) + { + int dest_node; + err = re_node_set_insert (eps_via_nodes, node); + if (BE (err < 0, 0)) + return -2; + dest_node = dfa->edests[node].elems[0]; + if (re_node_set_contains (&mctx->state_log[*pidx]->nodes, + dest_node)) + return dest_node; + } + } + + if (naccepted != 0 + || check_node_accept (mctx, dfa->nodes + node, *pidx)) + { + int dest_node = dfa->nexts[node]; + *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted; + if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL + || !re_node_set_contains (&mctx->state_log[*pidx]->nodes, + dest_node))) + return -1; + re_node_set_empty (eps_via_nodes); + return dest_node; + } + } + return -1; +} + +static reg_errcode_t +internal_function +push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node, + int nregs, regmatch_t *regs, re_node_set *eps_via_nodes) +{ + reg_errcode_t err; + int num = fs->num++; + if (fs->num == fs->alloc) + { + struct re_fail_stack_ent_t *new_array; + new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t) + * fs->alloc * 2)); + if (new_array == NULL) + return REG_ESPACE; + fs->alloc *= 2; + fs->stack = new_array; + } + fs->stack[num].idx = str_idx; + fs->stack[num].node = dest_node; + fs->stack[num].regs = re_malloc (regmatch_t, nregs); + if (fs->stack[num].regs == NULL) + return REG_ESPACE; + memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); + err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); + return err; +} + +static int +internal_function +pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs, + regmatch_t *regs, re_node_set *eps_via_nodes) +{ + int num = --fs->num; + assert (num >= 0); + *pidx = fs->stack[num].idx; + memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); + re_node_set_free (eps_via_nodes); + re_free (fs->stack[num].regs); + *eps_via_nodes = fs->stack[num].eps_via_nodes; + return fs->stack[num].node; +} + +/* Set the positions where the subexpressions are starts/ends to registers + PMATCH. + Note: We assume that pmatch[0] is already set, and + pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */ + +static reg_errcode_t +internal_function +set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, + regmatch_t *pmatch, int fl_backtrack) +{ + const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; + int idx, cur_node; + re_node_set eps_via_nodes; + struct re_fail_stack_t *fs; + struct re_fail_stack_t fs_body = { 0, 2, NULL }; + regmatch_t *prev_idx_match; + int prev_idx_match_malloced = 0; + +#ifdef DEBUG + assert (nmatch > 1); + assert (mctx->state_log != NULL); +#endif + if (fl_backtrack) + { + fs = &fs_body; + fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc); + if (fs->stack == NULL) + return REG_ESPACE; + } + else + fs = NULL; + + cur_node = dfa->init_node; + re_node_set_init_empty (&eps_via_nodes); + + if (__libc_use_alloca (nmatch * sizeof (regmatch_t))) + prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t)); + else + { + prev_idx_match = re_malloc (regmatch_t, nmatch); + if (prev_idx_match == NULL) + { + free_fail_stack_return (fs); + return REG_ESPACE; + } + prev_idx_match_malloced = 1; + } + memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); + + for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;) + { + update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch); + + if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) + { + int reg_idx; + if (fs) + { + for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) + if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) + break; + if (reg_idx == nmatch) + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return free_fail_stack_return (fs); + } + cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, + &eps_via_nodes); + } + else + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return REG_NOERROR; + } + } + + /* Proceed to next node. */ + cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node, + &eps_via_nodes, fs); + + if (BE (cur_node < 0, 0)) + { + if (BE (cur_node == -2, 0)) + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + free_fail_stack_return (fs); + return REG_ESPACE; + } + if (fs) + cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, + &eps_via_nodes); + else + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return REG_NOMATCH; + } + } + } + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return free_fail_stack_return (fs); +} + +static reg_errcode_t +internal_function +free_fail_stack_return (struct re_fail_stack_t *fs) +{ + if (fs) + { + int fs_idx; + for (fs_idx = 0; fs_idx < fs->num; ++fs_idx) + { + re_node_set_free (&fs->stack[fs_idx].eps_via_nodes); + re_free (fs->stack[fs_idx].regs); + } + re_free (fs->stack); + } + return REG_NOERROR; +} + +static void +internal_function +update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, + regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch) +{ + int type = dfa->nodes[cur_node].type; + if (type == OP_OPEN_SUBEXP) + { + int reg_num = dfa->nodes[cur_node].opr.idx + 1; + + /* We are at the first node of this sub expression. */ + if (reg_num < nmatch) + { + pmatch[reg_num].rm_so = cur_idx; + pmatch[reg_num].rm_eo = -1; + } + } + else if (type == OP_CLOSE_SUBEXP) + { + int reg_num = dfa->nodes[cur_node].opr.idx + 1; + if (reg_num < nmatch) + { + /* We are at the last node of this sub expression. */ + if (pmatch[reg_num].rm_so < cur_idx) + { + pmatch[reg_num].rm_eo = cur_idx; + /* This is a non-empty match or we are not inside an optional + subexpression. Accept this right away. */ + memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); + } + else + { + if (dfa->nodes[cur_node].opt_subexp + && prev_idx_match[reg_num].rm_so != -1) + /* We transited through an empty match for an optional + subexpression, like (a?)*, and this is not the subexp's + first match. Copy back the old content of the registers + so that matches of an inner subexpression are undone as + well, like in ((a?))*. */ + memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch); + else + /* We completed a subexpression, but it may be part of + an optional one, so do not update PREV_IDX_MATCH. */ + pmatch[reg_num].rm_eo = cur_idx; + } + } + } +} + +/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0 + and sift the nodes in each states according to the following rules. + Updated state_log will be wrote to STATE_LOG. + + Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if... + 1. When STR_IDX == MATCH_LAST(the last index in the state_log): + If `a' isn't the LAST_NODE and `a' can't epsilon transit to + the LAST_NODE, we throw away the node `a'. + 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts + string `s' and transit to `b': + i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw + away the node `a'. + ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is + thrown away, we throw away the node `a'. + 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': + i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the + node `a'. + ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away, + we throw away the node `a'. */ + +#define STATE_NODE_CONTAINS(state,node) \ + ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) + +static reg_errcode_t +internal_function +sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) +{ + reg_errcode_t err; + int null_cnt = 0; + int str_idx = sctx->last_str_idx; + re_node_set cur_dest; + +#ifdef DEBUG + assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL); +#endif + + /* Build sifted state_log[str_idx]. It has the nodes which can epsilon + transit to the last_node and the last_node itself. */ + err = re_node_set_init_1 (&cur_dest, sctx->last_node); + if (BE (err != REG_NOERROR, 0)) + return err; + err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* Then check each states in the state_log. */ + while (str_idx > 0) + { + /* Update counters. */ + null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0; + if (null_cnt > mctx->max_mb_elem_len) + { + memset (sctx->sifted_states, '\0', + sizeof (re_dfastate_t *) * str_idx); + re_node_set_free (&cur_dest); + return REG_NOERROR; + } + re_node_set_empty (&cur_dest); + --str_idx; + + if (mctx->state_log[str_idx]) + { + err = build_sifted_states (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + + /* Add all the nodes which satisfy the following conditions: + - It can epsilon transit to a node in CUR_DEST. + - It is in CUR_SRC. + And update state_log. */ + err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + err = REG_NOERROR; + free_return: + re_node_set_free (&cur_dest); + return err; +} + +static reg_errcode_t +internal_function +build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, + int str_idx, re_node_set *cur_dest) +{ + const re_dfa_t *const dfa = mctx->dfa; + const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes; + int i; + + /* Then build the next sifted state. + We build the next sifted state on `cur_dest', and update + `sifted_states[str_idx]' with `cur_dest'. + Note: + `cur_dest' is the sifted state from `state_log[str_idx + 1]'. + `cur_src' points the node_set of the old `state_log[str_idx]' + (with the epsilon nodes pre-filtered out). */ + for (i = 0; i < cur_src->nelem; i++) + { + int prev_node = cur_src->elems[i]; + int naccepted = 0; + int ret; + +#ifdef DEBUG + re_token_type_t type = dfa->nodes[prev_node].type; + assert (!IS_EPSILON_NODE (type)); +#endif +#ifdef RE_ENABLE_I18N + /* If the node may accept `multi byte'. */ + if (dfa->nodes[prev_node].accept_mb) + naccepted = sift_states_iter_mb (mctx, sctx, prev_node, + str_idx, sctx->last_str_idx); +#endif /* RE_ENABLE_I18N */ + + /* We don't check backreferences here. + See update_cur_sifted_state(). */ + if (!naccepted + && check_node_accept (mctx, dfa->nodes + prev_node, str_idx) + && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1], + dfa->nexts[prev_node])) + naccepted = 1; + + if (naccepted == 0) + continue; + + if (sctx->limits.nelem) + { + int to_idx = str_idx + naccepted; + if (check_dst_limits (mctx, &sctx->limits, + dfa->nexts[prev_node], to_idx, + prev_node, str_idx)) + continue; + } + ret = re_node_set_insert (cur_dest, prev_node); + if (BE (ret == -1, 0)) + return REG_ESPACE; + } + + return REG_NOERROR; +} + +/* Helper functions. */ + +static reg_errcode_t +internal_function +clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx) +{ + int top = mctx->state_log_top; + + if (next_state_log_idx >= mctx->input.bufs_len + || (next_state_log_idx >= mctx->input.valid_len + && mctx->input.valid_len < mctx->input.len)) + { + reg_errcode_t err; + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (top < next_state_log_idx) + { + memset (mctx->state_log + top + 1, '\0', + sizeof (re_dfastate_t *) * (next_state_log_idx - top)); + mctx->state_log_top = next_state_log_idx; + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst, + re_dfastate_t **src, int num) +{ + int st_idx; + reg_errcode_t err; + for (st_idx = 0; st_idx < num; ++st_idx) + { + if (dst[st_idx] == NULL) + dst[st_idx] = src[st_idx]; + else if (src[st_idx] != NULL) + { + re_node_set merged_set; + err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes, + &src[st_idx]->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + dst[st_idx] = re_acquire_state (&err, dfa, &merged_set); + re_node_set_free (&merged_set); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +update_cur_sifted_state (const re_match_context_t *mctx, + re_sift_context_t *sctx, int str_idx, + re_node_set *dest_nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err = REG_NOERROR; + const re_node_set *candidates; + candidates = ((mctx->state_log[str_idx] == NULL) ? NULL + : &mctx->state_log[str_idx]->nodes); + + if (dest_nodes->nelem == 0) + sctx->sifted_states[str_idx] = NULL; + else + { + if (candidates) + { + /* At first, add the nodes which can epsilon transit to a node in + DEST_NODE. */ + err = add_epsilon_src_nodes (dfa, dest_nodes, candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* Then, check the limitations in the current sift_context. */ + if (sctx->limits.nelem) + { + err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits, + mctx->bkref_ents, str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + + sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (candidates && mctx->state_log[str_idx]->has_backref) + { + err = sift_states_bkref (mctx, sctx, str_idx, candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, + const re_node_set *candidates) +{ + reg_errcode_t err = REG_NOERROR; + int i; + + re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (!state->inveclosure.alloc) + { + err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem); + if (BE (err != REG_NOERROR, 0)) + return REG_ESPACE; + for (i = 0; i < dest_nodes->nelem; i++) + re_node_set_merge (&state->inveclosure, + dfa->inveclosures + dest_nodes->elems[i]); + } + return re_node_set_add_intersect (dest_nodes, candidates, + &state->inveclosure); +} + +static reg_errcode_t +internal_function +sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes, + const re_node_set *candidates) +{ + int ecl_idx; + reg_errcode_t err; + re_node_set *inv_eclosure = dfa->inveclosures + node; + re_node_set except_nodes; + re_node_set_init_empty (&except_nodes); + for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) + { + int cur_node = inv_eclosure->elems[ecl_idx]; + if (cur_node == node) + continue; + if (IS_EPSILON_NODE (dfa->nodes[cur_node].type)) + { + int edst1 = dfa->edests[cur_node].elems[0]; + int edst2 = ((dfa->edests[cur_node].nelem > 1) + ? dfa->edests[cur_node].elems[1] : -1); + if ((!re_node_set_contains (inv_eclosure, edst1) + && re_node_set_contains (dest_nodes, edst1)) + || (edst2 > 0 + && !re_node_set_contains (inv_eclosure, edst2) + && re_node_set_contains (dest_nodes, edst2))) + { + err = re_node_set_add_intersect (&except_nodes, candidates, + dfa->inveclosures + cur_node); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&except_nodes); + return err; + } + } + } + } + for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) + { + int cur_node = inv_eclosure->elems[ecl_idx]; + if (!re_node_set_contains (&except_nodes, cur_node)) + { + int idx = re_node_set_contains (dest_nodes, cur_node) - 1; + re_node_set_remove_at (dest_nodes, idx); + } + } + re_node_set_free (&except_nodes); + return REG_NOERROR; +} + +static int +internal_function +check_dst_limits (const re_match_context_t *mctx, re_node_set *limits, + int dst_node, int dst_idx, int src_node, int src_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int lim_idx, src_pos, dst_pos; + + int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx); + int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx); + for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) + { + int subexp_idx; + struct re_backref_cache_entry *ent; + ent = mctx->bkref_ents + limits->elems[lim_idx]; + subexp_idx = dfa->nodes[ent->node].opr.idx; + + dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], + subexp_idx, dst_node, dst_idx, + dst_bkref_idx); + src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], + subexp_idx, src_node, src_idx, + src_bkref_idx); + + /* In case of: + ( ) + ( ) + ( ) */ + if (src_pos == dst_pos) + continue; /* This is unrelated limitation. */ + else + return 1; + } + return 0; +} + +static int +internal_function +check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, + int subexp_idx, int from_node, int bkref_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + const re_node_set *eclosures = dfa->eclosures + from_node; + int node_idx; + + /* Else, we are on the boundary: examine the nodes on the epsilon + closure. */ + for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) + { + int node = eclosures->elems[node_idx]; + switch (dfa->nodes[node].type) + { + case OP_BACK_REF: + if (bkref_idx != -1) + { + struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; + do + { + int dst, cpos; + + if (ent->node != node) + continue; + + if (subexp_idx < BITSET_WORD_BITS + && !(ent->eps_reachable_subexps_map + & ((bitset_word_t) 1 << subexp_idx))) + continue; + + /* Recurse trying to reach the OP_OPEN_SUBEXP and + OP_CLOSE_SUBEXP cases below. But, if the + destination node is the same node as the source + node, don't recurse because it would cause an + infinite loop: a regex that exhibits this behavior + is ()\1*\1* */ + dst = dfa->edests[node].elems[0]; + if (dst == from_node) + { + if (boundaries & 1) + return -1; + else /* if (boundaries & 2) */ + return 0; + } + + cpos = + check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, + dst, bkref_idx); + if (cpos == -1 /* && (boundaries & 1) */) + return -1; + if (cpos == 0 && (boundaries & 2)) + return 0; + + if (subexp_idx < BITSET_WORD_BITS) + ent->eps_reachable_subexps_map + &= ~((bitset_word_t) 1 << subexp_idx); + } + while (ent++->more); + } + break; + + case OP_OPEN_SUBEXP: + if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx) + return -1; + break; + + case OP_CLOSE_SUBEXP: + if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx) + return 0; + break; + + default: + break; + } + } + + return (boundaries & 2) ? 1 : 0; +} + +static int +internal_function +check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit, + int subexp_idx, int from_node, int str_idx, + int bkref_idx) +{ + struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; + int boundaries; + + /* If we are outside the range of the subexpression, return -1 or 1. */ + if (str_idx < lim->subexp_from) + return -1; + + if (lim->subexp_to < str_idx) + return 1; + + /* If we are within the subexpression, return 0. */ + boundaries = (str_idx == lim->subexp_from); + boundaries |= (str_idx == lim->subexp_to) << 1; + if (boundaries == 0) + return 0; + + /* Else, examine epsilon closure. */ + return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, + from_node, bkref_idx); +} + +/* Check the limitations of sub expressions LIMITS, and remove the nodes + which are against limitations from DEST_NODES. */ + +static reg_errcode_t +internal_function +check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, + const re_node_set *candidates, re_node_set *limits, + struct re_backref_cache_entry *bkref_ents, int str_idx) +{ + reg_errcode_t err; + int node_idx, lim_idx; + + for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) + { + int subexp_idx; + struct re_backref_cache_entry *ent; + ent = bkref_ents + limits->elems[lim_idx]; + + if (str_idx <= ent->subexp_from || ent->str_idx < str_idx) + continue; /* This is unrelated limitation. */ + + subexp_idx = dfa->nodes[ent->node].opr.idx; + if (ent->subexp_to == str_idx) + { + int ops_node = -1; + int cls_node = -1; + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + re_token_type_t type = dfa->nodes[node].type; + if (type == OP_OPEN_SUBEXP + && subexp_idx == dfa->nodes[node].opr.idx) + ops_node = node; + else if (type == OP_CLOSE_SUBEXP + && subexp_idx == dfa->nodes[node].opr.idx) + cls_node = node; + } + + /* Check the limitation of the open subexpression. */ + /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */ + if (ops_node >= 0) + { + err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + /* Check the limitation of the close subexpression. */ + if (cls_node >= 0) + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + if (!re_node_set_contains (dfa->inveclosures + node, + cls_node) + && !re_node_set_contains (dfa->eclosures + node, + cls_node)) + { + /* It is against this limitation. + Remove it form the current sifted state. */ + err = sub_epsilon_src_nodes (dfa, node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + --node_idx; + } + } + } + else /* (ent->subexp_to != str_idx) */ + { + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + re_token_type_t type = dfa->nodes[node].type; + if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP) + { + if (subexp_idx != dfa->nodes[node].opr.idx) + continue; + /* It is against this limitation. + Remove it form the current sifted state. */ + err = sub_epsilon_src_nodes (dfa, node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, + int str_idx, const re_node_set *candidates) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int node_idx, node; + re_sift_context_t local_sctx; + int first_idx = search_cur_bkref_entry (mctx, str_idx); + + if (first_idx == -1) + return REG_NOERROR; + + local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */ + + for (node_idx = 0; node_idx < candidates->nelem; ++node_idx) + { + int enabled_idx; + re_token_type_t type; + struct re_backref_cache_entry *entry; + node = candidates->elems[node_idx]; + type = dfa->nodes[node].type; + /* Avoid infinite loop for the REs like "()\1+". */ + if (node == sctx->last_node && str_idx == sctx->last_str_idx) + continue; + if (type != OP_BACK_REF) + continue; + + entry = mctx->bkref_ents + first_idx; + enabled_idx = first_idx; + do + { + int subexp_len; + int to_idx; + int dst_node; + int ret; + re_dfastate_t *cur_state; + + if (entry->node != node) + continue; + subexp_len = entry->subexp_to - entry->subexp_from; + to_idx = str_idx + subexp_len; + dst_node = (subexp_len ? dfa->nexts[node] + : dfa->edests[node].elems[0]); + + if (to_idx > sctx->last_str_idx + || sctx->sifted_states[to_idx] == NULL + || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node) + || check_dst_limits (mctx, &sctx->limits, node, + str_idx, dst_node, to_idx)) + continue; + + if (local_sctx.sifted_states == NULL) + { + local_sctx = *sctx; + err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + local_sctx.last_node = node; + local_sctx.last_str_idx = str_idx; + ret = re_node_set_insert (&local_sctx.limits, enabled_idx); + if (BE (ret < 0, 0)) + { + err = REG_ESPACE; + goto free_return; + } + cur_state = local_sctx.sifted_states[str_idx]; + err = sift_states_backward (mctx, &local_sctx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + if (sctx->limited_states != NULL) + { + err = merge_state_array (dfa, sctx->limited_states, + local_sctx.sifted_states, + str_idx + 1); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + local_sctx.sifted_states[str_idx] = cur_state; + re_node_set_remove (&local_sctx.limits, enabled_idx); + + /* mctx->bkref_ents may have changed, reload the pointer. */ + entry = mctx->bkref_ents + enabled_idx; + } + while (enabled_idx++, entry++->more); + } + err = REG_NOERROR; + free_return: + if (local_sctx.sifted_states != NULL) + { + re_node_set_free (&local_sctx.limits); + } + + return err; +} + + +#ifdef RE_ENABLE_I18N +static int +internal_function +sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, + int node_idx, int str_idx, int max_str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int naccepted; + /* Check the node can accept `multi byte'. */ + naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx); + if (naccepted > 0 && str_idx + naccepted <= max_str_idx && + !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], + dfa->nexts[node_idx])) + /* The node can't accept the `multi byte', or the + destination was already thrown away, then the node + could't accept the current input `multi byte'. */ + naccepted = 0; + /* Otherwise, it is sure that the node could accept + `naccepted' bytes input. */ + return naccepted; +} +#endif /* RE_ENABLE_I18N */ + + +/* Functions for state transition. */ + +/* Return the next state to which the current state STATE will transit by + accepting the current input byte, and update STATE_LOG if necessary. + If STATE can accept a multibyte char/collating element/back reference + update the destination of STATE_LOG. */ + +static re_dfastate_t * +internal_function +transit_state (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *state) +{ + re_dfastate_t **trtable; + unsigned char ch; + +#ifdef RE_ENABLE_I18N + /* If the current state can accept multibyte. */ + if (BE (state->accept_mb, 0)) + { + *err = transit_state_mb (mctx, state); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } +#endif /* RE_ENABLE_I18N */ + + /* Then decide the next state with the single byte. */ +#if 0 + if (0) + /* don't use transition table */ + return transit_state_sb (err, mctx, state); +#endif + + /* Use transition table */ + ch = re_string_fetch_byte (&mctx->input); + for (;;) + { + trtable = state->trtable; + if (BE (trtable != NULL, 1)) + return trtable[ch]; + + trtable = state->word_trtable; + if (BE (trtable != NULL, 1)) + { + unsigned int context; + context + = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input) - 1, + mctx->eflags); + if (IS_WORD_CONTEXT (context)) + return trtable[ch + SBC_MAX]; + else + return trtable[ch]; + } + + if (!build_trtable (mctx->dfa, state)) + { + *err = REG_ESPACE; + return NULL; + } + + /* Retry, we now have a transition table. */ + } +} + +/* Update the state_log if we need */ +re_dfastate_t * +internal_function +merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *next_state) +{ + const re_dfa_t *const dfa = mctx->dfa; + int cur_idx = re_string_cur_idx (&mctx->input); + + if (cur_idx > mctx->state_log_top) + { + mctx->state_log[cur_idx] = next_state; + mctx->state_log_top = cur_idx; + } + else if (mctx->state_log[cur_idx] == 0) + { + mctx->state_log[cur_idx] = next_state; + } + else + { + re_dfastate_t *pstate; + unsigned int context; + re_node_set next_nodes, *log_nodes, *table_nodes = NULL; + /* If (state_log[cur_idx] != 0), it implies that cur_idx is + the destination of a multibyte char/collating element/ + back reference. Then the next state is the union set of + these destinations and the results of the transition table. */ + pstate = mctx->state_log[cur_idx]; + log_nodes = pstate->entrance_nodes; + if (next_state != NULL) + { + table_nodes = next_state->entrance_nodes; + *err = re_node_set_init_union (&next_nodes, table_nodes, + log_nodes); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } + else + next_nodes = *log_nodes; + /* Note: We already add the nodes of the initial state, + then we don't need to add them here. */ + + context = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input) - 1, + mctx->eflags); + next_state = mctx->state_log[cur_idx] + = re_acquire_state_context (err, dfa, &next_nodes, context); + /* We don't need to check errors here, since the return value of + this function is next_state and ERR is already set. */ + + if (table_nodes != NULL) + re_node_set_free (&next_nodes); + } + + if (BE (dfa->nbackref, 0) && next_state != NULL) + { + /* Check OP_OPEN_SUBEXP in the current state in case that we use them + later. We must check them here, since the back references in the + next state might use them. */ + *err = check_subexp_matching_top (mctx, &next_state->nodes, + cur_idx); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + + /* If the next state has back references. */ + if (next_state->has_backref) + { + *err = transit_state_bkref (mctx, &next_state->nodes); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + next_state = mctx->state_log[cur_idx]; + } + } + + return next_state; +} + +/* Skip bytes in the input that correspond to part of a + multi-byte match, then look in the log for a state + from which to restart matching. */ +re_dfastate_t * +internal_function +find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) +{ + re_dfastate_t *cur_state; + do + { + int max = mctx->state_log_top; + int cur_str_idx = re_string_cur_idx (&mctx->input); + + do + { + if (++cur_str_idx > max) + return NULL; + re_string_skip_bytes (&mctx->input, 1); + } + while (mctx->state_log[cur_str_idx] == NULL); + + cur_state = merge_state_with_log (err, mctx, NULL); + } + while (*err == REG_NOERROR && cur_state == NULL); + return cur_state; +} + +/* Helper functions for transit_state. */ + +/* From the node set CUR_NODES, pick up the nodes whose types are + OP_OPEN_SUBEXP and which have corresponding back references in the regular + expression. And register them to use them later for evaluating the + correspoding back references. */ + +static reg_errcode_t +internal_function +check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, + int str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int node_idx; + reg_errcode_t err; + + /* TODO: This isn't efficient. + Because there might be more than one nodes whose types are + OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all + nodes. + E.g. RE: (a){2} */ + for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx) + { + int node = cur_nodes->elems[node_idx]; + if (dfa->nodes[node].type == OP_OPEN_SUBEXP + && dfa->nodes[node].opr.idx < BITSET_WORD_BITS + && (dfa->used_bkref_map + & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx))) + { + err = match_ctx_add_subtop (mctx, node, str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + return REG_NOERROR; +} + +#if 0 +/* Return the next state to which the current state STATE will transit by + accepting the current input byte. */ + +static re_dfastate_t * +transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *state) +{ + const re_dfa_t *const dfa = mctx->dfa; + re_node_set next_nodes; + re_dfastate_t *next_state; + int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input); + unsigned int context; + + *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt) + { + int cur_node = state->nodes.elems[node_cnt]; + if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx)) + { + *err = re_node_set_merge (&next_nodes, + dfa->eclosures + dfa->nexts[cur_node]); + if (BE (*err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return NULL; + } + } + } + context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags); + next_state = re_acquire_state_context (err, dfa, &next_nodes, context); + /* We don't need to check errors here, since the return value of + this function is next_state and ERR is already set. */ + + re_node_set_free (&next_nodes); + re_string_skip_bytes (&mctx->input, 1); + return next_state; +} +#endif + +#ifdef RE_ENABLE_I18N +static reg_errcode_t +internal_function +transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int i; + + for (i = 0; i < pstate->nodes.nelem; ++i) + { + re_node_set dest_nodes, *new_nodes; + int cur_node_idx = pstate->nodes.elems[i]; + int naccepted, dest_idx; + unsigned int context; + re_dfastate_t *dest_state; + + if (!dfa->nodes[cur_node_idx].accept_mb) + continue; + + if (dfa->nodes[cur_node_idx].constraint) + { + context = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input), + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint, + context)) + continue; + } + + /* How many bytes the node can accept? */ + naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input, + re_string_cur_idx (&mctx->input)); + if (naccepted == 0) + continue; + + /* The node can accepts `naccepted' bytes. */ + dest_idx = re_string_cur_idx (&mctx->input) + naccepted; + mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted + : mctx->max_mb_elem_len); + err = clean_state_log_if_needed (mctx, dest_idx); + if (BE (err != REG_NOERROR, 0)) + return err; +#ifdef DEBUG + assert (dfa->nexts[cur_node_idx] != -1); +#endif + new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx]; + + dest_state = mctx->state_log[dest_idx]; + if (dest_state == NULL) + dest_nodes = *new_nodes; + else + { + err = re_node_set_init_union (&dest_nodes, + dest_state->entrance_nodes, new_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + context = re_string_context_at (&mctx->input, dest_idx - 1, + mctx->eflags); + mctx->state_log[dest_idx] + = re_acquire_state_context (&err, dfa, &dest_nodes, context); + if (dest_state != NULL) + re_node_set_free (&dest_nodes); + if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0)) + return err; + } + return REG_NOERROR; +} +#endif /* RE_ENABLE_I18N */ + +static reg_errcode_t +internal_function +transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int i; + int cur_str_idx = re_string_cur_idx (&mctx->input); + + for (i = 0; i < nodes->nelem; ++i) + { + int dest_str_idx, prev_nelem, bkc_idx; + int node_idx = nodes->elems[i]; + unsigned int context; + const re_token_t *node = dfa->nodes + node_idx; + re_node_set *new_dest_nodes; + + /* Check whether `node' is a backreference or not. */ + if (node->type != OP_BACK_REF) + continue; + + if (node->constraint) + { + context = re_string_context_at (&mctx->input, cur_str_idx, + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) + continue; + } + + /* `node' is a backreference. + Check the substring which the substring matched. */ + bkc_idx = mctx->nbkref_ents; + err = get_subexp (mctx, node_idx, cur_str_idx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* And add the epsilon closures (which is `new_dest_nodes') of + the backreference to appropriate state_log. */ +#ifdef DEBUG + assert (dfa->nexts[node_idx] != -1); +#endif + for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx) + { + int subexp_len; + re_dfastate_t *dest_state; + struct re_backref_cache_entry *bkref_ent; + bkref_ent = mctx->bkref_ents + bkc_idx; + if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx) + continue; + subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from; + new_dest_nodes = (subexp_len == 0 + ? dfa->eclosures + dfa->edests[node_idx].elems[0] + : dfa->eclosures + dfa->nexts[node_idx]); + dest_str_idx = (cur_str_idx + bkref_ent->subexp_to + - bkref_ent->subexp_from); + context = re_string_context_at (&mctx->input, dest_str_idx - 1, + mctx->eflags); + dest_state = mctx->state_log[dest_str_idx]; + prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 + : mctx->state_log[cur_str_idx]->nodes.nelem); + /* Add `new_dest_node' to state_log. */ + if (dest_state == NULL) + { + mctx->state_log[dest_str_idx] + = re_acquire_state_context (&err, dfa, new_dest_nodes, + context); + if (BE (mctx->state_log[dest_str_idx] == NULL + && err != REG_NOERROR, 0)) + goto free_return; + } + else + { + re_node_set dest_nodes; + err = re_node_set_init_union (&dest_nodes, + dest_state->entrance_nodes, + new_dest_nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&dest_nodes); + goto free_return; + } + mctx->state_log[dest_str_idx] + = re_acquire_state_context (&err, dfa, &dest_nodes, context); + re_node_set_free (&dest_nodes); + if (BE (mctx->state_log[dest_str_idx] == NULL + && err != REG_NOERROR, 0)) + goto free_return; + } + /* We need to check recursively if the backreference can epsilon + transit. */ + if (subexp_len == 0 + && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem) + { + err = check_subexp_matching_top (mctx, new_dest_nodes, + cur_str_idx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + err = transit_state_bkref (mctx, new_dest_nodes); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + } + } + err = REG_NOERROR; + free_return: + return err; +} + +/* Enumerate all the candidates which the backreference BKREF_NODE can match + at BKREF_STR_IDX, and register them by match_ctx_add_entry(). + Note that we might collect inappropriate candidates here. + However, the cost of checking them strictly here is too high, then we + delay these checking for prune_impossible_nodes(). */ + +static reg_errcode_t +internal_function +get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int subexp_num, sub_top_idx; + const char *buf = (const char *) re_string_get_buffer (&mctx->input); + /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */ + int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx); + if (cache_idx != -1) + { + const struct re_backref_cache_entry *entry + = mctx->bkref_ents + cache_idx; + do + if (entry->node == bkref_node) + return REG_NOERROR; /* We already checked it. */ + while (entry++->more); + } + + subexp_num = dfa->nodes[bkref_node].opr.idx; + + /* For each sub expression */ + for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx) + { + reg_errcode_t err; + re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx]; + re_sub_match_last_t *sub_last; + int sub_last_idx, sl_str, bkref_str_off; + + if (dfa->nodes[sub_top->node].opr.idx != subexp_num) + continue; /* It isn't related. */ + + sl_str = sub_top->str_idx; + bkref_str_off = bkref_str_idx; + /* At first, check the last node of sub expressions we already + evaluated. */ + for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx) + { + int sl_str_diff; + sub_last = sub_top->lasts[sub_last_idx]; + sl_str_diff = sub_last->str_idx - sl_str; + /* The matched string by the sub expression match with the substring + at the back reference? */ + if (sl_str_diff > 0) + { + if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0)) + { + /* Not enough chars for a successful match. */ + if (bkref_str_off + sl_str_diff > mctx->input.len) + break; + + err = clean_state_log_if_needed (mctx, + bkref_str_off + + sl_str_diff); + if (BE (err != REG_NOERROR, 0)) + return err; + buf = (const char *) re_string_get_buffer (&mctx->input); + } + if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0) + /* We don't need to search this sub expression any more. */ + break; + } + bkref_str_off += sl_str_diff; + sl_str += sl_str_diff; + err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, + bkref_str_idx); + + /* Reload buf, since the preceding call might have reallocated + the buffer. */ + buf = (const char *) re_string_get_buffer (&mctx->input); + + if (err == REG_NOMATCH) + continue; + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (sub_last_idx < sub_top->nlasts) + continue; + if (sub_last_idx > 0) + ++sl_str; + /* Then, search for the other last nodes of the sub expression. */ + for (; sl_str <= bkref_str_idx; ++sl_str) + { + int cls_node, sl_str_off; + const re_node_set *nodes; + sl_str_off = sl_str - sub_top->str_idx; + /* The matched string by the sub expression match with the substring + at the back reference? */ + if (sl_str_off > 0) + { + if (BE (bkref_str_off >= mctx->input.valid_len, 0)) + { + /* If we are at the end of the input, we cannot match. */ + if (bkref_str_off >= mctx->input.len) + break; + + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + return err; + + buf = (const char *) re_string_get_buffer (&mctx->input); + } + if (buf [bkref_str_off++] != buf[sl_str - 1]) + break; /* We don't need to search this sub expression + any more. */ + } + if (mctx->state_log[sl_str] == NULL) + continue; + /* Does this state have a ')' of the sub expression? */ + nodes = &mctx->state_log[sl_str]->nodes; + cls_node = find_subexp_node (dfa, nodes, subexp_num, + OP_CLOSE_SUBEXP); + if (cls_node == -1) + continue; /* No. */ + if (sub_top->path == NULL) + { + sub_top->path = calloc (sizeof (state_array_t), + sl_str - sub_top->str_idx + 1); + if (sub_top->path == NULL) + return REG_ESPACE; + } + /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node + in the current context? */ + err = check_arrival (mctx, sub_top->path, sub_top->node, + sub_top->str_idx, cls_node, sl_str, + OP_CLOSE_SUBEXP); + if (err == REG_NOMATCH) + continue; + if (BE (err != REG_NOERROR, 0)) + return err; + sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str); + if (BE (sub_last == NULL, 0)) + return REG_ESPACE; + err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, + bkref_str_idx); + if (err == REG_NOMATCH) + continue; + } + } + return REG_NOERROR; +} + +/* Helper functions for get_subexp(). */ + +/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR. + If it can arrive, register the sub expression expressed with SUB_TOP + and SUB_LAST. */ + +static reg_errcode_t +internal_function +get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, + re_sub_match_last_t *sub_last, int bkref_node, int bkref_str) +{ + reg_errcode_t err; + int to_idx; + /* Can the subexpression arrive the back reference? */ + err = check_arrival (mctx, &sub_last->path, sub_last->node, + sub_last->str_idx, bkref_node, bkref_str, + OP_OPEN_SUBEXP); + if (err != REG_NOERROR) + return err; + err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx, + sub_last->str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx; + return clean_state_log_if_needed (mctx, to_idx); +} + +/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX. + Search '(' if FL_OPEN, or search ')' otherwise. + TODO: This function isn't efficient... + Because there might be more than one nodes whose types are + OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all + nodes. + E.g. RE: (a){2} */ + +static int +internal_function +find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, + int subexp_idx, int type) +{ + int cls_idx; + for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx) + { + int cls_node = nodes->elems[cls_idx]; + const re_token_t *node = dfa->nodes + cls_node; + if (node->type == type + && node->opr.idx == subexp_idx) + return cls_node; + } + return -1; +} + +/* Check whether the node TOP_NODE at TOP_STR can arrive to the node + LAST_NODE at LAST_STR. We record the path onto PATH since it will be + heavily reused. + Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ + +static reg_errcode_t +internal_function +check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node, + int top_str, int last_node, int last_str, int type) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err = REG_NOERROR; + int subexp_num, backup_cur_idx, str_idx, null_cnt; + re_dfastate_t *cur_state = NULL; + re_node_set *cur_nodes, next_nodes; + re_dfastate_t **backup_state_log; + unsigned int context; + + subexp_num = dfa->nodes[top_node].opr.idx; + /* Extend the buffer if we need. */ + if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0)) + { + re_dfastate_t **new_array; + int old_alloc = path->alloc; + path->alloc += last_str + mctx->max_mb_elem_len + 1; + new_array = re_realloc (path->array, re_dfastate_t *, path->alloc); + if (BE (new_array == NULL, 0)) + { + path->alloc = old_alloc; + return REG_ESPACE; + } + path->array = new_array; + memset (new_array + old_alloc, '\0', + sizeof (re_dfastate_t *) * (path->alloc - old_alloc)); + } + + str_idx = path->next_idx ? path->next_idx : top_str; + + /* Temporary modify MCTX. */ + backup_state_log = mctx->state_log; + backup_cur_idx = mctx->input.cur_idx; + mctx->state_log = path->array; + mctx->input.cur_idx = str_idx; + + /* Setup initial node set. */ + context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); + if (str_idx == top_str) + { + err = re_node_set_init_1 (&next_nodes, top_node); + if (BE (err != REG_NOERROR, 0)) + return err; + err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + else + { + cur_state = mctx->state_log[str_idx]; + if (cur_state && cur_state->has_backref) + { + err = re_node_set_init_copy (&next_nodes, &cur_state->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + re_node_set_init_empty (&next_nodes); + } + if (str_idx == top_str || (cur_state && cur_state->has_backref)) + { + if (next_nodes.nelem) + { + err = expand_bkref_cache (mctx, &next_nodes, str_idx, + subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); + if (BE (cur_state == NULL && err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + mctx->state_log[str_idx] = cur_state; + } + + for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;) + { + re_node_set_empty (&next_nodes); + if (mctx->state_log[str_idx + 1]) + { + err = re_node_set_merge (&next_nodes, + &mctx->state_log[str_idx + 1]->nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + if (cur_state) + { + err = check_arrival_add_next_nodes (mctx, str_idx, + &cur_state->non_eps_nodes, + &next_nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + ++str_idx; + if (next_nodes.nelem) + { + err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + err = expand_bkref_cache (mctx, &next_nodes, str_idx, + subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); + cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); + if (BE (cur_state == NULL && err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + mctx->state_log[str_idx] = cur_state; + null_cnt = cur_state == NULL ? null_cnt + 1 : 0; + } + re_node_set_free (&next_nodes); + cur_nodes = (mctx->state_log[last_str] == NULL ? NULL + : &mctx->state_log[last_str]->nodes); + path->next_idx = str_idx; + + /* Fix MCTX. */ + mctx->state_log = backup_state_log; + mctx->input.cur_idx = backup_cur_idx; + + /* Then check the current node set has the node LAST_NODE. */ + if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node)) + return REG_NOERROR; + + return REG_NOMATCH; +} + +/* Helper functions for check_arrival. */ + +/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them + to NEXT_NODES. + TODO: This function is similar to the functions transit_state*(), + however this function has many additional works. + Can't we unify them? */ + +static reg_errcode_t +internal_function +check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx, + re_node_set *cur_nodes, re_node_set *next_nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + int result; + int cur_idx; + reg_errcode_t err = REG_NOERROR; + re_node_set union_set; + re_node_set_init_empty (&union_set); + for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx) + { + int naccepted = 0; + int cur_node = cur_nodes->elems[cur_idx]; +#ifdef DEBUG + re_token_type_t type = dfa->nodes[cur_node].type; + assert (!IS_EPSILON_NODE (type)); +#endif +#ifdef RE_ENABLE_I18N + /* If the node may accept `multi byte'. */ + if (dfa->nodes[cur_node].accept_mb) + { + naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input, + str_idx); + if (naccepted > 1) + { + re_dfastate_t *dest_state; + int next_node = dfa->nexts[cur_node]; + int next_idx = str_idx + naccepted; + dest_state = mctx->state_log[next_idx]; + re_node_set_empty (&union_set); + if (dest_state) + { + err = re_node_set_merge (&union_set, &dest_state->nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&union_set); + return err; + } + } + result = re_node_set_insert (&union_set, next_node); + if (BE (result < 0, 0)) + { + re_node_set_free (&union_set); + return REG_ESPACE; + } + mctx->state_log[next_idx] = re_acquire_state (&err, dfa, + &union_set); + if (BE (mctx->state_log[next_idx] == NULL + && err != REG_NOERROR, 0)) + { + re_node_set_free (&union_set); + return err; + } + } + } +#endif /* RE_ENABLE_I18N */ + if (naccepted + || check_node_accept (mctx, dfa->nodes + cur_node, str_idx)) + { + result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]); + if (BE (result < 0, 0)) + { + re_node_set_free (&union_set); + return REG_ESPACE; + } + } + } + re_node_set_free (&union_set); + return REG_NOERROR; +} + +/* For all the nodes in CUR_NODES, add the epsilon closures of them to + CUR_NODES, however exclude the nodes which are: + - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN. + - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN. +*/ + +static reg_errcode_t +internal_function +check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, + int ex_subexp, int type) +{ + reg_errcode_t err; + int idx, outside_node; + re_node_set new_nodes; +#ifdef DEBUG + assert (cur_nodes->nelem); +#endif + err = re_node_set_alloc (&new_nodes, cur_nodes->nelem); + if (BE (err != REG_NOERROR, 0)) + return err; + /* Create a new node set NEW_NODES with the nodes which are epsilon + closures of the node in CUR_NODES. */ + + for (idx = 0; idx < cur_nodes->nelem; ++idx) + { + int cur_node = cur_nodes->elems[idx]; + const re_node_set *eclosure = dfa->eclosures + cur_node; + outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type); + if (outside_node == -1) + { + /* There are no problematic nodes, just merge them. */ + err = re_node_set_merge (&new_nodes, eclosure); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&new_nodes); + return err; + } + } + else + { + /* There are problematic nodes, re-calculate incrementally. */ + err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node, + ex_subexp, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&new_nodes); + return err; + } + } + } + re_node_set_free (cur_nodes); + *cur_nodes = new_nodes; + return REG_NOERROR; +} + +/* Helper function for check_arrival_expand_ecl. + Check incrementally the epsilon closure of TARGET, and if it isn't + problematic append it to DST_NODES. */ + +static reg_errcode_t +internal_function +check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, + int target, int ex_subexp, int type) +{ + int cur_node; + for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);) + { + int err; + + if (dfa->nodes[cur_node].type == type + && dfa->nodes[cur_node].opr.idx == ex_subexp) + { + if (type == OP_CLOSE_SUBEXP) + { + err = re_node_set_insert (dst_nodes, cur_node); + if (BE (err == -1, 0)) + return REG_ESPACE; + } + break; + } + err = re_node_set_insert (dst_nodes, cur_node); + if (BE (err == -1, 0)) + return REG_ESPACE; + if (dfa->edests[cur_node].nelem == 0) + break; + if (dfa->edests[cur_node].nelem == 2) + { + err = check_arrival_expand_ecl_sub (dfa, dst_nodes, + dfa->edests[cur_node].elems[1], + ex_subexp, type); + if (BE (err != REG_NOERROR, 0)) + return err; + } + cur_node = dfa->edests[cur_node].elems[0]; + } + return REG_NOERROR; +} + + +/* For all the back references in the current state, calculate the + destination of the back references by the appropriate entry + in MCTX->BKREF_ENTS. */ + +static reg_errcode_t +internal_function +expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, + int cur_str, int subexp_num, int type) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int cache_idx_start = search_cur_bkref_entry (mctx, cur_str); + struct re_backref_cache_entry *ent; + + if (cache_idx_start == -1) + return REG_NOERROR; + + restart: + ent = mctx->bkref_ents + cache_idx_start; + do + { + int to_idx, next_node; + + /* Is this entry ENT is appropriate? */ + if (!re_node_set_contains (cur_nodes, ent->node)) + continue; /* No. */ + + to_idx = cur_str + ent->subexp_to - ent->subexp_from; + /* Calculate the destination of the back reference, and append it + to MCTX->STATE_LOG. */ + if (to_idx == cur_str) + { + /* The backreference did epsilon transit, we must re-check all the + node in the current state. */ + re_node_set new_dests; + reg_errcode_t err2, err3; + next_node = dfa->edests[ent->node].elems[0]; + if (re_node_set_contains (cur_nodes, next_node)) + continue; + err = re_node_set_init_1 (&new_dests, next_node); + err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type); + err3 = re_node_set_merge (cur_nodes, &new_dests); + re_node_set_free (&new_dests); + if (BE (err != REG_NOERROR || err2 != REG_NOERROR + || err3 != REG_NOERROR, 0)) + { + err = (err != REG_NOERROR ? err + : (err2 != REG_NOERROR ? err2 : err3)); + return err; + } + /* TODO: It is still inefficient... */ + goto restart; + } + else + { + re_node_set union_set; + next_node = dfa->nexts[ent->node]; + if (mctx->state_log[to_idx]) + { + int ret; + if (re_node_set_contains (&mctx->state_log[to_idx]->nodes, + next_node)) + continue; + err = re_node_set_init_copy (&union_set, + &mctx->state_log[to_idx]->nodes); + ret = re_node_set_insert (&union_set, next_node); + if (BE (err != REG_NOERROR || ret < 0, 0)) + { + re_node_set_free (&union_set); + err = err != REG_NOERROR ? err : REG_ESPACE; + return err; + } + } + else + { + err = re_node_set_init_1 (&union_set, next_node); + if (BE (err != REG_NOERROR, 0)) + return err; + } + mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set); + re_node_set_free (&union_set); + if (BE (mctx->state_log[to_idx] == NULL + && err != REG_NOERROR, 0)) + return err; + } + } + while (ent++->more); + return REG_NOERROR; +} + +/* Build transition table for the state. + Return 1 if succeeded, otherwise return NULL. */ + +static int +internal_function +build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) +{ + reg_errcode_t err; + int i, j, ch, need_word_trtable = 0; + bitset_word_t elem, mask; + bool dests_node_malloced = false; + bool dest_states_malloced = false; + int ndests; /* Number of the destination states from `state'. */ + re_dfastate_t **trtable; + re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; + re_node_set follows, *dests_node; + bitset_t *dests_ch; + bitset_t acceptable; + + struct dests_alloc + { + re_node_set dests_node[SBC_MAX]; + bitset_t dests_ch[SBC_MAX]; + } *dests_alloc; + + /* We build DFA states which corresponds to the destination nodes + from `state'. `dests_node[i]' represents the nodes which i-th + destination state contains, and `dests_ch[i]' represents the + characters which i-th destination state accepts. */ + if (__libc_use_alloca (sizeof (struct dests_alloc))) + dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc)); + else + { + dests_alloc = re_malloc (struct dests_alloc, 1); + if (BE (dests_alloc == NULL, 0)) + return 0; + dests_node_malloced = true; + } + dests_node = dests_alloc->dests_node; + dests_ch = dests_alloc->dests_ch; + + /* Initialize transiton table. */ + state->word_trtable = state->trtable = NULL; + + /* At first, group all nodes belonging to `state' into several + destinations. */ + ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); + if (BE (ndests <= 0, 0)) + { + if (dests_node_malloced) + free (dests_alloc); + /* Return 0 in case of an error, 1 otherwise. */ + if (ndests == 0) + { + state->trtable = (re_dfastate_t **) + calloc (sizeof (re_dfastate_t *), SBC_MAX); + return 1; + } + return 0; + } + + err = re_node_set_alloc (&follows, ndests + 1); + if (BE (err != REG_NOERROR, 0)) + goto out_free; + + if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX + + ndests * 3 * sizeof (re_dfastate_t *))) + dest_states = (re_dfastate_t **) + alloca (ndests * 3 * sizeof (re_dfastate_t *)); + else + { + dest_states = (re_dfastate_t **) + malloc (ndests * 3 * sizeof (re_dfastate_t *)); + if (BE (dest_states == NULL, 0)) + { +out_free: + if (dest_states_malloced) + free (dest_states); + re_node_set_free (&follows); + for (i = 0; i < ndests; ++i) + re_node_set_free (dests_node + i); + if (dests_node_malloced) + free (dests_alloc); + return 0; + } + dest_states_malloced = true; + } + dest_states_word = dest_states + ndests; + dest_states_nl = dest_states_word + ndests; + bitset_empty (acceptable); + + /* Then build the states for all destinations. */ + for (i = 0; i < ndests; ++i) + { + int next_node; + re_node_set_empty (&follows); + /* Merge the follows of this destination states. */ + for (j = 0; j < dests_node[i].nelem; ++j) + { + next_node = dfa->nexts[dests_node[i].elems[j]]; + if (next_node != -1) + { + err = re_node_set_merge (&follows, dfa->eclosures + next_node); + if (BE (err != REG_NOERROR, 0)) + goto out_free; + } + } + dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0); + if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + /* If the new state has context constraint, + build appropriate states for these contexts. */ + if (dest_states[i]->has_constraint) + { + dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows, + CONTEXT_WORD); + if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + + if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1) + need_word_trtable = 1; + + dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, + CONTEXT_NEWLINE); + if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + } + else + { + dest_states_word[i] = dest_states[i]; + dest_states_nl[i] = dest_states[i]; + } + bitset_merge (acceptable, dests_ch[i]); + } + + if (!BE (need_word_trtable, 0)) + { + /* We don't care about whether the following character is a word + character, or we are in a single-byte character set so we can + discern by looking at the character code: allocate a + 256-entry transition table. */ + trtable = state->trtable = + (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); + if (BE (trtable == NULL, 0)) + goto out_free; + + /* For all characters ch...: */ + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; + elem; + mask <<= 1, elem >>= 1, ++ch) + if (BE (elem & 1, 0)) + { + /* There must be exactly one destination which accepts + character ch. See group_nodes_into_DFAstates. */ + for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) + ; + + /* j-th destination accepts the word character ch. */ + if (dfa->word_char[i] & mask) + trtable[ch] = dest_states_word[j]; + else + trtable[ch] = dest_states[j]; + } + } + else + { + /* We care about whether the following character is a word + character, and we are in a multi-byte character set: discern + by looking at the character code: build two 256-entry + transition tables, one starting at trtable[0] and one + starting at trtable[SBC_MAX]. */ + trtable = state->word_trtable = + (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX); + if (BE (trtable == NULL, 0)) + goto out_free; + + /* For all characters ch...: */ + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; + elem; + mask <<= 1, elem >>= 1, ++ch) + if (BE (elem & 1, 0)) + { + /* There must be exactly one destination which accepts + character ch. See group_nodes_into_DFAstates. */ + for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) + ; + + /* j-th destination accepts the word character ch. */ + trtable[ch] = dest_states[j]; + trtable[ch + SBC_MAX] = dest_states_word[j]; + } + } + + /* new line */ + if (bitset_contain (acceptable, NEWLINE_CHAR)) + { + /* The current state accepts newline character. */ + for (j = 0; j < ndests; ++j) + if (bitset_contain (dests_ch[j], NEWLINE_CHAR)) + { + /* k-th destination accepts newline character. */ + trtable[NEWLINE_CHAR] = dest_states_nl[j]; + if (need_word_trtable) + trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j]; + /* There must be only one destination which accepts + newline. See group_nodes_into_DFAstates. */ + break; + } + } + + if (dest_states_malloced) + free (dest_states); + + re_node_set_free (&follows); + for (i = 0; i < ndests; ++i) + re_node_set_free (dests_node + i); + + if (dests_node_malloced) + free (dests_alloc); + + return 1; +} + +/* Group all nodes belonging to STATE into several destinations. + Then for all destinations, set the nodes belonging to the destination + to DESTS_NODE[i] and set the characters accepted by the destination + to DEST_CH[i]. This function return the number of destinations. */ + +static int +internal_function +group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, + re_node_set *dests_node, bitset_t *dests_ch) +{ + reg_errcode_t err; + int result; + int i, j, k; + int ndests; /* Number of the destinations from `state'. */ + bitset_t accepts; /* Characters a node can accept. */ + const re_node_set *cur_nodes = &state->nodes; + bitset_empty (accepts); + ndests = 0; + + /* For all the nodes belonging to `state', */ + for (i = 0; i < cur_nodes->nelem; ++i) + { + re_token_t *node = &dfa->nodes[cur_nodes->elems[i]]; + re_token_type_t type = node->type; + unsigned int constraint = node->constraint; + + /* Enumerate all single byte character this node can accept. */ + if (type == CHARACTER) + bitset_set (accepts, node->opr.c); + else if (type == SIMPLE_BRACKET) + { + bitset_merge (accepts, node->opr.sbcset); + } + else if (type == OP_PERIOD) + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + bitset_merge (accepts, dfa->sb_char); + else +#endif + bitset_set_all (accepts); + if (!(dfa->syntax & RE_DOT_NEWLINE)) + bitset_clear (accepts, '\n'); + if (dfa->syntax & RE_DOT_NOT_NULL) + bitset_clear (accepts, '\0'); + } +#ifdef RE_ENABLE_I18N + else if (type == OP_UTF8_PERIOD) + { + memset (accepts, '\xff', sizeof (bitset_t) / 2); + if (!(dfa->syntax & RE_DOT_NEWLINE)) + bitset_clear (accepts, '\n'); + if (dfa->syntax & RE_DOT_NOT_NULL) + bitset_clear (accepts, '\0'); + } +#endif + else + continue; + + /* Check the `accepts' and sift the characters which are not + match it the context. */ + if (constraint) + { + if (constraint & NEXT_NEWLINE_CONSTRAINT) + { + bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); + bitset_empty (accepts); + if (accepts_newline) + bitset_set (accepts, NEWLINE_CHAR); + else + continue; + } + if (constraint & NEXT_ENDBUF_CONSTRAINT) + { + bitset_empty (accepts); + continue; + } + + if (constraint & NEXT_WORD_CONSTRAINT) + { + bitset_word_t any_set = 0; + if (type == CHARACTER && !node->word_char) + { + bitset_empty (accepts); + continue; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); + else +#endif + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= dfa->word_char[j]); + if (!any_set) + continue; + } + if (constraint & NEXT_NOTWORD_CONSTRAINT) + { + bitset_word_t any_set = 0; + if (type == CHARACTER && node->word_char) + { + bitset_empty (accepts); + continue; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); + else +#endif + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= ~dfa->word_char[j]); + if (!any_set) + continue; + } + } + + /* Then divide `accepts' into DFA states, or create a new + state. Above, we make sure that accepts is not empty. */ + for (j = 0; j < ndests; ++j) + { + bitset_t intersec; /* Intersection sets, see below. */ + bitset_t remains; + /* Flags, see below. */ + bitset_word_t has_intersec, not_subset, not_consumed; + + /* Optimization, skip if this state doesn't accept the character. */ + if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) + continue; + + /* Enumerate the intersection set of this state and `accepts'. */ + has_intersec = 0; + for (k = 0; k < BITSET_WORDS; ++k) + has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; + /* And skip if the intersection set is empty. */ + if (!has_intersec) + continue; + + /* Then check if this state is a subset of `accepts'. */ + not_subset = not_consumed = 0; + for (k = 0; k < BITSET_WORDS; ++k) + { + not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k]; + not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; + } + + /* If this state isn't a subset of `accepts', create a + new group state, which has the `remains'. */ + if (not_subset) + { + bitset_copy (dests_ch[ndests], remains); + bitset_copy (dests_ch[j], intersec); + err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]); + if (BE (err != REG_NOERROR, 0)) + goto error_return; + ++ndests; + } + + /* Put the position in the current group. */ + result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); + if (BE (result < 0, 0)) + goto error_return; + + /* If all characters are consumed, go to next node. */ + if (!not_consumed) + break; + } + /* Some characters remain, create a new group. */ + if (j == ndests) + { + bitset_copy (dests_ch[ndests], accepts); + err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]); + if (BE (err != REG_NOERROR, 0)) + goto error_return; + ++ndests; + bitset_empty (accepts); + } + } + return ndests; + error_return: + for (j = 0; j < ndests; ++j) + re_node_set_free (dests_node + j); + return -1; +} + +#ifdef RE_ENABLE_I18N +/* Check how many bytes the node `dfa->nodes[node_idx]' accepts. + Return the number of the bytes the node accepts. + STR_IDX is the current index of the input string. + + This function handles the nodes which can accept one character, or + one collating element like '.', '[a-z]', opposite to the other nodes + can only accept one byte. */ + +static int +internal_function +check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, + const re_string_t *input, int str_idx) +{ + const re_token_t *node = dfa->nodes + node_idx; + int char_len, elem_len; + int i; + + if (BE (node->type == OP_UTF8_PERIOD, 0)) + { + unsigned char c = re_string_byte_at (input, str_idx), d; + if (BE (c < 0xc2, 1)) + return 0; + + if (str_idx + 2 > input->len) + return 0; + + d = re_string_byte_at (input, str_idx + 1); + if (c < 0xe0) + return (d < 0x80 || d > 0xbf) ? 0 : 2; + else if (c < 0xf0) + { + char_len = 3; + if (c == 0xe0 && d < 0xa0) + return 0; + } + else if (c < 0xf8) + { + char_len = 4; + if (c == 0xf0 && d < 0x90) + return 0; + } + else if (c < 0xfc) + { + char_len = 5; + if (c == 0xf8 && d < 0x88) + return 0; + } + else if (c < 0xfe) + { + char_len = 6; + if (c == 0xfc && d < 0x84) + return 0; + } + else + return 0; + + if (str_idx + char_len > input->len) + return 0; + + for (i = 1; i < char_len; ++i) + { + d = re_string_byte_at (input, str_idx + i); + if (d < 0x80 || d > 0xbf) + return 0; + } + return char_len; + } + + char_len = re_string_char_size_at (input, str_idx); + if (node->type == OP_PERIOD) + { + if (char_len <= 1) + return 0; + /* FIXME: I don't think this if is needed, as both '\n' + and '\0' are char_len == 1. */ + /* '.' accepts any one character except the following two cases. */ + if ((!(dfa->syntax & RE_DOT_NEWLINE) && + re_string_byte_at (input, str_idx) == '\n') || + ((dfa->syntax & RE_DOT_NOT_NULL) && + re_string_byte_at (input, str_idx) == '\0')) + return 0; + return char_len; + } + + elem_len = re_string_elem_size_at (input, str_idx); + if ((elem_len <= 1 && char_len <= 1) || char_len == 0) + return 0; + + if (node->type == COMPLEX_BRACKET) + { + const re_charset_t *cset = node->opr.mbcset; +# ifdef _LIBC + const unsigned char *pin + = ((const unsigned char *) re_string_get_buffer (input) + str_idx); + int j; + uint32_t nrules; +# endif /* _LIBC */ + int match_len = 0; + wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) + ? re_string_wchar_at (input, str_idx) : 0); + + /* match with multibyte character? */ + for (i = 0; i < cset->nmbchars; ++i) + if (wc == cset->mbchars[i]) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + /* match with character_class? */ + for (i = 0; i < cset->nchar_classes; ++i) + { + wctype_t wt = cset->char_classes[i]; + if (__iswctype (wc, wt)) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + } + +# ifdef _LIBC + nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules != 0) + { + unsigned int in_collseq = 0; + const int32_t *table, *indirect; + const unsigned char *weights, *extra; + const char *collseqwc; + int32_t idx; + /* This #include defines a local function! */ +# include + + /* match with collating_symbol? */ + if (cset->ncoll_syms) + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); + for (i = 0; i < cset->ncoll_syms; ++i) + { + const unsigned char *coll_sym = extra + cset->coll_syms[i]; + /* Compare the length of input collating element and + the length of current collating element. */ + if (*coll_sym != elem_len) + continue; + /* Compare each bytes. */ + for (j = 0; j < *coll_sym; j++) + if (pin[j] != coll_sym[1 + j]) + break; + if (j == *coll_sym) + { + /* Match if every bytes is equal. */ + match_len = j; + goto check_node_accept_bytes_match; + } + } + + if (cset->nranges) + { + if (elem_len <= char_len) + { + collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); + in_collseq = __collseq_table_lookup (collseqwc, wc); + } + else + in_collseq = find_collation_sequence_value (pin, elem_len); + } + /* match with range expression? */ + for (i = 0; i < cset->nranges; ++i) + if (cset->range_starts[i] <= in_collseq + && in_collseq <= cset->range_ends[i]) + { + match_len = elem_len; + goto check_node_accept_bytes_match; + } + + /* match with equivalence_class? */ + if (cset->nequiv_classes) + { + const unsigned char *cp = pin; + table = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + weights = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); + idx = findidx (&cp); + if (idx > 0) + for (i = 0; i < cset->nequiv_classes; ++i) + { + int32_t equiv_class_idx = cset->equiv_classes[i]; + size_t weight_len = weights[idx]; + if (weight_len == weights[equiv_class_idx]) + { + int cnt = 0; + while (cnt <= weight_len + && (weights[equiv_class_idx + 1 + cnt] + == weights[idx + 1 + cnt])) + ++cnt; + if (cnt > weight_len) + { + match_len = elem_len; + goto check_node_accept_bytes_match; + } + } + } + } + } + else +# endif /* _LIBC */ + { + /* match with range expression? */ +#if __GNUC__ >= 2 + wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; +#else + wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; + cmp_buf[2] = wc; +#endif + for (i = 0; i < cset->nranges; ++i) + { + cmp_buf[0] = cset->range_starts[i]; + cmp_buf[4] = cset->range_ends[i]; + if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 + && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + } + } + check_node_accept_bytes_match: + if (!cset->non_match) + return match_len; + else + { + if (match_len > 0) + return 0; + else + return (elem_len > char_len) ? elem_len : char_len; + } + } + return 0; +} + +# ifdef _LIBC +static unsigned int +internal_function +find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) +{ + uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules == 0) + { + if (mbs_len == 1) + { + /* No valid character. Match it as a single byte character. */ + const unsigned char *collseq = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); + return collseq[mbs[0]]; + } + return UINT_MAX; + } + else + { + int32_t idx; + const unsigned char *extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); + int32_t extrasize = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra; + + for (idx = 0; idx < extrasize;) + { + int mbs_cnt, found = 0; + int32_t elem_mbs_len; + /* Skip the name of collating element name. */ + idx = idx + extra[idx] + 1; + elem_mbs_len = extra[idx++]; + if (mbs_len == elem_mbs_len) + { + for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt) + if (extra[idx + mbs_cnt] != mbs[mbs_cnt]) + break; + if (mbs_cnt == elem_mbs_len) + /* Found the entry. */ + found = 1; + } + /* Skip the byte sequence of the collating element. */ + idx += elem_mbs_len; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; + /* Skip the collation sequence value. */ + idx += sizeof (uint32_t); + /* Skip the wide char sequence of the collating element. */ + idx = idx + sizeof (uint32_t) * (extra[idx] + 1); + /* If we found the entry, return the sequence value. */ + if (found) + return *(uint32_t *) (extra + idx); + /* Skip the collation sequence value. */ + idx += sizeof (uint32_t); + } + return UINT_MAX; + } +} +# endif /* _LIBC */ +#endif /* RE_ENABLE_I18N */ + +/* Check whether the node accepts the byte which is IDX-th + byte of the INPUT. */ + +static int +internal_function +check_node_accept (const re_match_context_t *mctx, const re_token_t *node, + int idx) +{ + unsigned char ch; + ch = re_string_byte_at (&mctx->input, idx); + switch (node->type) + { + case CHARACTER: + if (node->opr.c != ch) + return 0; + break; + + case SIMPLE_BRACKET: + if (!bitset_contain (node->opr.sbcset, ch)) + return 0; + break; + +#ifdef RE_ENABLE_I18N + case OP_UTF8_PERIOD: + if (ch >= 0x80) + return 0; + /* FALLTHROUGH */ +#endif + case OP_PERIOD: + if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE)) + || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL))) + return 0; + break; + + default: + return 0; + } + + if (node->constraint) + { + /* The node has constraints. Check whether the current context + satisfies the constraints. */ + unsigned int context = re_string_context_at (&mctx->input, idx, + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) + return 0; + } + + return 1; +} + +/* Extend the buffers, if the buffers have run out. */ + +static reg_errcode_t +internal_function +extend_buffers (re_match_context_t *mctx) +{ + reg_errcode_t ret; + re_string_t *pstr = &mctx->input; + + /* Double the lengthes of the buffers. */ + ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + if (mctx->state_log != NULL) + { + /* And double the length of state_log. */ + /* XXX We have no indication of the size of this buffer. If this + allocation fail we have no indication that the state_log array + does not have the right size. */ + re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *, + pstr->bufs_len + 1); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + mctx->state_log = new_array; + } + + /* Then reconstruct the buffers. */ + if (pstr->icase) + { +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + else +#endif /* RE_ENABLE_I18N */ + build_upper_buffer (pstr); + } + else + { +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + build_wcs_buffer (pstr); + else +#endif /* RE_ENABLE_I18N */ + { + if (pstr->trans != NULL) + re_string_translate_buffer (pstr); + } + } + return REG_NOERROR; +} + + +/* Functions for matching context. */ + +/* Initialize MCTX. */ + +static reg_errcode_t +internal_function +match_ctx_init (re_match_context_t *mctx, int eflags, int n) +{ + mctx->eflags = eflags; + mctx->match_last = -1; + if (n > 0) + { + mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); + mctx->sub_tops = re_malloc (re_sub_match_top_t *, n); + if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0)) + return REG_ESPACE; + } + /* Already zero-ed by the caller. + else + mctx->bkref_ents = NULL; + mctx->nbkref_ents = 0; + mctx->nsub_tops = 0; */ + mctx->abkref_ents = n; + mctx->max_mb_elem_len = 1; + mctx->asub_tops = n; + return REG_NOERROR; +} + +/* Clean the entries which depend on the current input in MCTX. + This function must be invoked when the matcher changes the start index + of the input, or changes the input string. */ + +static void +internal_function +match_ctx_clean (re_match_context_t *mctx) +{ + int st_idx; + for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx) + { + int sl_idx; + re_sub_match_top_t *top = mctx->sub_tops[st_idx]; + for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx) + { + re_sub_match_last_t *last = top->lasts[sl_idx]; + re_free (last->path.array); + re_free (last); + } + re_free (top->lasts); + if (top->path) + { + re_free (top->path->array); + re_free (top->path); + } + free (top); + } + + mctx->nsub_tops = 0; + mctx->nbkref_ents = 0; +} + +/* Free all the memory associated with MCTX. */ + +static void +internal_function +match_ctx_free (re_match_context_t *mctx) +{ + /* First, free all the memory associated with MCTX->SUB_TOPS. */ + match_ctx_clean (mctx); + re_free (mctx->sub_tops); + re_free (mctx->bkref_ents); +} + +/* Add a new backreference entry to MCTX. + Note that we assume that caller never call this function with duplicate + entry, and call with STR_IDX which isn't smaller than any existing entry. +*/ + +static reg_errcode_t +internal_function +match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from, + int to) +{ + if (mctx->nbkref_ents >= mctx->abkref_ents) + { + struct re_backref_cache_entry* new_entry; + new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry, + mctx->abkref_ents * 2); + if (BE (new_entry == NULL, 0)) + { + re_free (mctx->bkref_ents); + return REG_ESPACE; + } + mctx->bkref_ents = new_entry; + memset (mctx->bkref_ents + mctx->nbkref_ents, '\0', + sizeof (struct re_backref_cache_entry) * mctx->abkref_ents); + mctx->abkref_ents *= 2; + } + if (mctx->nbkref_ents > 0 + && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx) + mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1; + + mctx->bkref_ents[mctx->nbkref_ents].node = node; + mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx; + mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from; + mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to; + + /* This is a cache that saves negative results of check_dst_limits_calc_pos. + If bit N is clear, means that this entry won't epsilon-transition to + an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If + it is set, check_dst_limits_calc_pos_1 will recurse and try to find one + such node. + + A backreference does not epsilon-transition unless it is empty, so set + to all zeros if FROM != TO. */ + mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map + = (from == to ? ~0 : 0); + + mctx->bkref_ents[mctx->nbkref_ents++].more = 0; + if (mctx->max_mb_elem_len < to - from) + mctx->max_mb_elem_len = to - from; + return REG_NOERROR; +} + +/* Search for the first entry which has the same str_idx, or -1 if none is + found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */ + +static int +internal_function +search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) +{ + int left, right, mid, last; + last = right = mctx->nbkref_ents; + for (left = 0; left < right;) + { + mid = (left + right) / 2; + if (mctx->bkref_ents[mid].str_idx < str_idx) + left = mid + 1; + else + right = mid; + } + if (left < last && mctx->bkref_ents[left].str_idx == str_idx) + return left; + else + return -1; +} + +/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches + at STR_IDX. */ + +static reg_errcode_t +internal_function +match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx) +{ +#ifdef DEBUG + assert (mctx->sub_tops != NULL); + assert (mctx->asub_tops > 0); +#endif + if (BE (mctx->nsub_tops == mctx->asub_tops, 0)) + { + int new_asub_tops = mctx->asub_tops * 2; + re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops, + re_sub_match_top_t *, + new_asub_tops); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + mctx->sub_tops = new_array; + mctx->asub_tops = new_asub_tops; + } + mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t)); + if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0)) + return REG_ESPACE; + mctx->sub_tops[mctx->nsub_tops]->node = node; + mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx; + return REG_NOERROR; +} + +/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches + at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */ + +static re_sub_match_last_t * +internal_function +match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx) +{ + re_sub_match_last_t *new_entry; + if (BE (subtop->nlasts == subtop->alasts, 0)) + { + int new_alasts = 2 * subtop->alasts + 1; + re_sub_match_last_t **new_array = re_realloc (subtop->lasts, + re_sub_match_last_t *, + new_alasts); + if (BE (new_array == NULL, 0)) + return NULL; + subtop->lasts = new_array; + subtop->alasts = new_alasts; + } + new_entry = calloc (1, sizeof (re_sub_match_last_t)); + if (BE (new_entry != NULL, 1)) + { + subtop->lasts[subtop->nlasts] = new_entry; + new_entry->node = node; + new_entry->str_idx = str_idx; + ++subtop->nlasts; + } + return new_entry; +} + +static void +internal_function +sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, + re_dfastate_t **limited_sts, int last_node, int last_str_idx) +{ + sctx->sifted_states = sifted_sts; + sctx->limited_states = limited_sts; + sctx->last_node = last_node; + sctx->last_str_idx = last_str_idx; + re_node_set_init_empty (&sctx->limits); +} + + +/* Binary backward compatibility. */ +#if _LIBC +# include +# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3) +link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.") +int re_max_failures = 2000; +# endif +#endif +#endif diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gkregex.h b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gkregex.h new file mode 100644 index 00000000..807c404e --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/gkregex.h @@ -0,0 +1,556 @@ +/* Definitions for data structures and routines for the regular + expression library. + Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _REGEX_H +#define _REGEX_H 1 + +#include + +/* Allow the use in C++ code. */ +#ifdef __cplusplus +extern "C" { +#endif + +/* The following two types have to be signed and unsigned integer type + wide enough to hold a value of a pointer. For most ANSI compilers + ptrdiff_t and size_t should be likely OK. Still size of these two + types is 2 for Microsoft C. Ugh... */ +typedef long int s_reg_t; +typedef unsigned long int active_reg_t; + +/* The following bits are used to determine the regexp syntax we + recognize. The set/not-set meanings are chosen so that Emacs syntax + remains the value 0. The bits are given in alphabetical order, and + the definitions shifted by one from the previous bit; thus, when we + add or remove a bit, only one other definition need change. */ +typedef unsigned long int reg_syntax_t; + +/* If this bit is not set, then \ inside a bracket expression is literal. + If set, then such a \ quotes the following character. */ +#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) + +/* If this bit is not set, then + and ? are operators, and \+ and \? are + literals. + If set, then \+ and \? are operators and + and ? are literals. */ +#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) + +/* If this bit is set, then character classes are supported. They are: + [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], + [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. + If not set, then character classes are not supported. */ +#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) + +/* If this bit is set, then ^ and $ are always anchors (outside bracket + expressions, of course). + If this bit is not set, then it depends: + ^ is an anchor if it is at the beginning of a regular + expression or after an open-group or an alternation operator; + $ is an anchor if it is at the end of a regular expression, or + before a close-group or an alternation operator. + + This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because + POSIX draft 11.2 says that * etc. in leading positions is undefined. + We already implemented a previous draft which made those constructs + invalid, though, so we haven't changed the code back. */ +#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) + +/* If this bit is set, then special characters are always special + regardless of where they are in the pattern. + If this bit is not set, then special characters are special only in + some contexts; otherwise they are ordinary. Specifically, + * + ? and intervals are only special when not after the beginning, + open-group, or alternation operator. */ +#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) + +/* If this bit is set, then *, +, ?, and { cannot be first in an re or + immediately after an alternation or begin-group operator. */ +#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) + +/* If this bit is set, then . matches newline. + If not set, then it doesn't. */ +#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) + +/* If this bit is set, then . doesn't match NUL. + If not set, then it does. */ +#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) + +/* If this bit is set, nonmatching lists [^...] do not match newline. + If not set, they do. */ +#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) + +/* If this bit is set, either \{...\} or {...} defines an + interval, depending on RE_NO_BK_BRACES. + If not set, \{, \}, {, and } are literals. */ +#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) + +/* If this bit is set, +, ? and | aren't recognized as operators. + If not set, they are. */ +#define RE_LIMITED_OPS (RE_INTERVALS << 1) + +/* If this bit is set, newline is an alternation operator. + If not set, newline is literal. */ +#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) + +/* If this bit is set, then `{...}' defines an interval, and \{ and \} + are literals. + If not set, then `\{...\}' defines an interval. */ +#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) + +/* If this bit is set, (...) defines a group, and \( and \) are literals. + If not set, \(...\) defines a group, and ( and ) are literals. */ +#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) + +/* If this bit is set, then \ matches . + If not set, then \ is a back-reference. */ +#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) + +/* If this bit is set, then | is an alternation operator, and \| is literal. + If not set, then \| is an alternation operator, and | is literal. */ +#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) + +/* If this bit is set, then an ending range point collating higher + than the starting range point, as in [z-a], is invalid. + If not set, then when ending range point collates higher than the + starting range point, the range is ignored. */ +#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) + +/* If this bit is set, then an unmatched ) is ordinary. + If not set, then an unmatched ) is invalid. */ +#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) + +/* If this bit is set, succeed as soon as we match the whole pattern, + without further backtracking. */ +#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) + +/* If this bit is set, do not process the GNU regex operators. + If not set, then the GNU regex operators are recognized. */ +#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) + +/* If this bit is set, turn on internal regex debugging. + If not set, and debugging was on, turn it off. + This only works if regex.c is compiled -DDEBUG. + We define this bit always, so that all that's needed to turn on + debugging is to recompile regex.c; the calling code can always have + this bit set, and it won't affect anything in the normal case. */ +#define RE_DEBUG (RE_NO_GNU_OPS << 1) + +/* If this bit is set, a syntactically invalid interval is treated as + a string of ordinary characters. For example, the ERE 'a{1' is + treated as 'a\{1'. */ +#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) + +/* If this bit is set, then ignore case when matching. + If not set, then case is significant. */ +#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) + +/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only + for ^, because it is difficult to scan the regex backwards to find + whether ^ should be special. */ +#define RE_CARET_ANCHORS_HERE (RE_ICASE << 1) + +/* If this bit is set, then \{ cannot be first in an bre or + immediately after an alternation or begin-group operator. */ +#define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1) + +/* If this bit is set, then no_sub will be set to 1 during + re_compile_pattern. */ +#define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1) + +/* This global variable defines the particular regexp syntax to use (for + some interfaces). When a regexp is compiled, the syntax used is + stored in the pattern buffer, so changing this does not affect + already-compiled regexps. */ +extern reg_syntax_t re_syntax_options; + +/* Define combinations of the above bits for the standard possibilities. + (The [[[ comments delimit what gets put into the Texinfo file, so + don't delete them!) */ +/* [[[begin syntaxes]]] */ +#define RE_SYNTAX_EMACS 0 + +#define RE_SYNTAX_AWK \ + (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ + | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ + | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) + +#define RE_SYNTAX_GNU_AWK \ + ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ + & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \ + | RE_CONTEXT_INVALID_OPS )) + +#define RE_SYNTAX_POSIX_AWK \ + (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ + | RE_INTERVALS | RE_NO_GNU_OPS) + +#define RE_SYNTAX_GREP \ + (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ + | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ + | RE_NEWLINE_ALT) + +#define RE_SYNTAX_EGREP \ + (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ + | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ + | RE_NO_BK_VBAR) + +#define RE_SYNTAX_POSIX_EGREP \ + (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ + | RE_INVALID_INTERVAL_ORD) + +/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ +#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC + +#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC + +/* Syntax bits common to both basic and extended POSIX regex syntax. */ +#define _RE_SYNTAX_POSIX_COMMON \ + (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ + | RE_INTERVALS | RE_NO_EMPTY_RANGES) + +#define RE_SYNTAX_POSIX_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP) + +/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes + RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this + isn't minimal, since other operators, such as \`, aren't disabled. */ +#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) + +#define RE_SYNTAX_POSIX_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ + | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) + +/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is + removed and RE_NO_BK_REFS is added. */ +#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) +/* [[[end syntaxes]]] */ + +/* Maximum number of duplicates an interval can allow. Some systems + (erroneously) define this in other header files, but we want our + value, so remove any previous define. */ +#ifdef RE_DUP_MAX +# undef RE_DUP_MAX +#endif +/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ +#define RE_DUP_MAX (0x7fff) + + +/* POSIX `cflags' bits (i.e., information for `regcomp'). */ + +/* If this bit is set, then use extended regular expression syntax. + If not set, then use basic regular expression syntax. */ +#define REG_EXTENDED 1 + +/* If this bit is set, then ignore case when matching. + If not set, then case is significant. */ +#define REG_ICASE (REG_EXTENDED << 1) + +/* If this bit is set, then anchors do not match at newline + characters in the string. + If not set, then anchors do match at newlines. */ +#define REG_NEWLINE (REG_ICASE << 1) + +/* If this bit is set, then report only success or fail in regexec. + If not set, then returns differ between not matching and errors. */ +#define REG_NOSUB (REG_NEWLINE << 1) + + +/* POSIX `eflags' bits (i.e., information for regexec). */ + +/* If this bit is set, then the beginning-of-line operator doesn't match + the beginning of the string (presumably because it's not the + beginning of a line). + If not set, then the beginning-of-line operator does match the + beginning of the string. */ +#define REG_NOTBOL 1 + +/* Like REG_NOTBOL, except for the end-of-line. */ +#define REG_NOTEOL (1 << 1) + +/* Use PMATCH[0] to delimit the start and end of the search in the + buffer. */ +#define REG_STARTEND (1 << 2) + + +/* If any error codes are removed, changed, or added, update the + `re_error_msg' table in regex.c. */ +typedef enum +{ +#ifdef _XOPEN_SOURCE + REG_ENOSYS = -1, /* This will never happen for this implementation. */ +#endif + + REG_NOERROR = 0, /* Success. */ + REG_NOMATCH, /* Didn't find a match (for regexec). */ + + /* POSIX regcomp return error codes. (In the order listed in the + standard.) */ + REG_BADPAT, /* Invalid pattern. */ + REG_ECOLLATE, /* Inalid collating element. */ + REG_ECTYPE, /* Invalid character class name. */ + REG_EESCAPE, /* Trailing backslash. */ + REG_ESUBREG, /* Invalid back reference. */ + REG_EBRACK, /* Unmatched left bracket. */ + REG_EPAREN, /* Parenthesis imbalance. */ + REG_EBRACE, /* Unmatched \{. */ + REG_BADBR, /* Invalid contents of \{\}. */ + REG_ERANGE, /* Invalid range end. */ + REG_ESPACE, /* Ran out of memory. */ + REG_BADRPT, /* No preceding re for repetition op. */ + + /* Error codes we've added. */ + REG_EEND, /* Premature end. */ + REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ + REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ +} reg_errcode_t; + +/* This data structure represents a compiled pattern. Before calling + the pattern compiler, the fields `buffer', `allocated', `fastmap', + `translate', and `no_sub' can be set. After the pattern has been + compiled, the `re_nsub' field is available. All other fields are + private to the regex routines. */ + +#ifndef RE_TRANSLATE_TYPE +# define RE_TRANSLATE_TYPE unsigned char * +#endif + +struct re_pattern_buffer +{ + /* Space that holds the compiled pattern. It is declared as + `unsigned char *' because its elements are sometimes used as + array indexes. */ + unsigned char *buffer; + + /* Number of bytes to which `buffer' points. */ + unsigned long int allocated; + + /* Number of bytes actually used in `buffer'. */ + unsigned long int used; + + /* Syntax setting with which the pattern was compiled. */ + reg_syntax_t syntax; + + /* Pointer to a fastmap, if any, otherwise zero. re_search uses the + fastmap, if there is one, to skip over impossible starting points + for matches. */ + char *fastmap; + + /* Either a translate table to apply to all characters before + comparing them, or zero for no translation. The translation is + applied to a pattern when it is compiled and to a string when it + is matched. */ + RE_TRANSLATE_TYPE translate; + + /* Number of subexpressions found by the compiler. */ + size_t re_nsub; + + /* Zero if this pattern cannot match the empty string, one else. + Well, in truth it's used only in `re_search_2', to see whether or + not we should use the fastmap, so we don't set this absolutely + perfectly; see `re_compile_fastmap' (the `duplicate' case). */ + unsigned can_be_null : 1; + + /* If REGS_UNALLOCATED, allocate space in the `regs' structure + for `max (RE_NREGS, re_nsub + 1)' groups. + If REGS_REALLOCATE, reallocate space if necessary. + If REGS_FIXED, use what's there. */ +#define REGS_UNALLOCATED 0 +#define REGS_REALLOCATE 1 +#define REGS_FIXED 2 + unsigned regs_allocated : 2; + + /* Set to zero when `regex_compile' compiles a pattern; set to one + by `re_compile_fastmap' if it updates the fastmap. */ + unsigned fastmap_accurate : 1; + + /* If set, `re_match_2' does not return information about + subexpressions. */ + unsigned no_sub : 1; + + /* If set, a beginning-of-line anchor doesn't match at the beginning + of the string. */ + unsigned not_bol : 1; + + /* Similarly for an end-of-line anchor. */ + unsigned not_eol : 1; + + /* If true, an anchor at a newline matches. */ + unsigned newline_anchor : 1; +}; + +typedef struct re_pattern_buffer regex_t; + +/* Type for byte offsets within the string. POSIX mandates this. */ +typedef int regoff_t; + + +/* This is the structure we store register match data in. See + regex.texinfo for a full description of what registers match. */ +struct re_registers +{ + unsigned num_regs; + regoff_t *start; + regoff_t *end; +}; + + +/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, + `re_match_2' returns information about at least this many registers + the first time a `regs' structure is passed. */ +#ifndef RE_NREGS +# define RE_NREGS 30 +#endif + + +/* POSIX specification for registers. Aside from the different names than + `re_registers', POSIX uses an array of structures, instead of a + structure of arrays. */ +typedef struct +{ + regoff_t rm_so; /* Byte offset from string's start to substring's start. */ + regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ +} regmatch_t; + +/* Declarations for routines. */ + +/* Sets the current default syntax to SYNTAX, and return the old syntax. + You can also simply assign to the `re_syntax_options' variable. */ +extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax); + +/* Compile the regular expression PATTERN, with length LENGTH + and syntax given by the global `re_syntax_options', into the buffer + BUFFER. Return NULL if successful, and an error string if not. */ +extern const char *re_compile_pattern (const char *__pattern, size_t __length, + struct re_pattern_buffer *__buffer); + + +/* Compile a fastmap for the compiled pattern in BUFFER; used to + accelerate searches. Return 0 if successful and -2 if was an + internal error. */ +extern int re_compile_fastmap (struct re_pattern_buffer *__buffer); + + +/* Search in the string STRING (with length LENGTH) for the pattern + compiled into BUFFER. Start searching at position START, for RANGE + characters. Return the starting position of the match, -1 for no + match, or -2 for an internal error. Also return register + information in REGS (if REGS and BUFFER->no_sub are nonzero). */ +extern int re_search (struct re_pattern_buffer *__buffer, const char *__string, + int __length, int __start, int __range, + struct re_registers *__regs); + + +/* Like `re_search', but search in the concatenation of STRING1 and + STRING2. Also, stop searching at index START + STOP. */ +extern int re_search_2 (struct re_pattern_buffer *__buffer, + const char *__string1, int __length1, + const char *__string2, int __length2, int __start, + int __range, struct re_registers *__regs, int __stop); + + +/* Like `re_search', but return how many characters in STRING the regexp + in BUFFER matched, starting at position START. */ +extern int re_match (struct re_pattern_buffer *__buffer, const char *__string, + int __length, int __start, struct re_registers *__regs); + + +/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ +extern int re_match_2 (struct re_pattern_buffer *__buffer, + const char *__string1, int __length1, + const char *__string2, int __length2, int __start, + struct re_registers *__regs, int __stop); + + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using BUFFER and REGS will use this memory + for recording register information. STARTS and ENDS must be + allocated with malloc, and must each be at least `NUM_REGS * sizeof + (regoff_t)' bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ +extern void re_set_registers (struct re_pattern_buffer *__buffer, + struct re_registers *__regs, + unsigned int __num_regs, + regoff_t *__starts, regoff_t *__ends); + +#if defined _REGEX_RE_COMP || defined _LIBC +# ifndef _CRAY +/* 4.2 bsd compatibility. */ +extern char *re_comp (const char *); +extern int re_exec (const char *); +# endif +#endif + +/* GCC 2.95 and later have "__restrict"; C99 compilers have + "restrict", and "configure" may have defined "restrict". */ +#ifndef __restrict +# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)) +# if defined restrict || 199901L <= __STDC_VERSION__ +# define __restrict restrict +# else +# define __restrict +# endif +# endif +#endif +/* gcc 3.1 and up support the [restrict] syntax. */ +#ifndef __restrict_arr +# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) \ + && !defined __GNUG__ +# define __restrict_arr __restrict +# else +# define __restrict_arr +# endif +#endif + +/* POSIX compatibility. */ +extern int regcomp (regex_t *__restrict __preg, + const char *__restrict __pattern, + int __cflags); + +extern int regexec (const regex_t *__restrict __preg, + const char *__restrict __string, size_t __nmatch, + regmatch_t __pmatch[__restrict_arr], + int __eflags); + +extern size_t regerror (int __errcode, const regex_t *__restrict __preg, + char *__restrict __errbuf, size_t __errbuf_size); + +extern void regfree (regex_t *__preg); + + +#ifdef __cplusplus +} +#endif /* C++ */ + +#endif /* regex.h */ diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/graph.c b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/graph.c new file mode 100644 index 00000000..fa40f07c --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/graph.c @@ -0,0 +1,1940 @@ +/*! + * \file + * + * \brief Various routines with dealing with sparse graphs + * + * \author George Karypis + * \version\verbatim $Id: graph.c 22415 2019-09-05 16:55:00Z karypis $ \endverbatim + */ + +#include + +#define OMPMINOPS 50000 + +/*************************************************************************/ +/*! Allocate memory for a graph and initializes it + \returns the allocated graph. The various fields are set to NULL. +*/ +/**************************************************************************/ +gk_graph_t *gk_graph_Create() +{ + gk_graph_t *graph; + + graph = (gk_graph_t *)gk_malloc(sizeof(gk_graph_t), "gk_graph_Create: graph"); + + gk_graph_Init(graph); + + return graph; +} + + +/*************************************************************************/ +/*! Initializes the graph. + \param graph is the graph to be initialized. +*/ +/*************************************************************************/ +void gk_graph_Init(gk_graph_t *graph) +{ + memset(graph, 0, sizeof(gk_graph_t)); + graph->nvtxs = -1; +} + + +/*************************************************************************/ +/*! Frees all the memory allocated for a graph. + \param graph is the graph to be freed. +*/ +/*************************************************************************/ +void gk_graph_Free(gk_graph_t **graph) +{ + if (*graph == NULL) + return; + gk_graph_FreeContents(*graph); + gk_free((void **)graph, LTERM); +} + + +/*************************************************************************/ +/*! Frees only the memory allocated for the graph's different fields and + sets them to NULL. + \param graph is the graph whose contents will be freed. +*/ +/*************************************************************************/ +void gk_graph_FreeContents(gk_graph_t *graph) +{ + gk_free((void *)&graph->xadj, &graph->adjncy, + &graph->iadjwgt, &graph->fadjwgt, + &graph->ivwgts, &graph->fvwgts, + &graph->ivsizes, &graph->fvsizes, + &graph->vlabels, + LTERM); +} + + +/**************************************************************************/ +/*! Reads a sparse graph from the supplied file + \param filename is the file that stores the data. + \param format is the graph format. The supported values are: + GK_GRAPH_FMT_METIS, GK_GRAPH_FMT_IJV. + \param hasvals is 1 if the input file has values + \param numbering is 1 if the input file numbering starts from one + \param isfewgts is 1 if the edge-weights should be read as floats + \param isfvwgts is 1 if the vertex-weights should be read as floats + \param isfvsizes is 1 if the vertex-sizes should be read as floats + \returns the graph that was read. +*/ +/**************************************************************************/ +gk_graph_t *gk_graph_Read(char *filename, int format, int hasvals, + int numbering, int isfewgts, int isfvwgts, int isfvsizes) +{ + ssize_t i, k, l; + size_t nfields, nvtxs, nedges, fmt, ncon, lnlen; + ssize_t *xadj; + int32_t ival, *iinds=NULL, *jinds=NULL, *ivals=NULL, *adjncy, *iadjwgt; + float fval, *fvals=NULL, *fadjwgt; + int readsizes=0, readwgts=0, readvals=0; + char *line=NULL, *head, *tail, fmtstr[256]; + FILE *fpin=NULL; + gk_graph_t *graph=NULL; + + + if (!gk_fexists(filename)) + gk_errexit(SIGERR, "File %s does not exist!\n", filename); + + switch (format) { + case GK_GRAPH_FMT_METIS: + fpin = gk_fopen(filename, "r", "gk_graph_Read: fpin"); + do { + if (gk_getline(&line, &lnlen, fpin) <= 0) + gk_errexit(SIGERR, "Premature end of input file: file:%s\n", filename); + } while (line[0] == '%'); + + fmt = ncon = 0; + nfields = sscanf(line, "%zu %zu %zu %zu", &nvtxs, &nedges, &fmt, &ncon); + if (nfields < 2) + gk_errexit(SIGERR, "Header line must contain at least 2 integers (#vtxs and #edges).\n"); + + nedges *= 2; + + if (fmt > 111) + gk_errexit(SIGERR, "Cannot read this type of file format [fmt=%zu]!\n", fmt); + + sprintf(fmtstr, "%03zu", fmt%1000); + readsizes = (fmtstr[0] == '1'); + readwgts = (fmtstr[1] == '1'); + readvals = (fmtstr[2] == '1'); + numbering = 1; + ncon = (ncon == 0 ? 1 : ncon); + + graph = gk_graph_Create(); + + graph->nvtxs = nvtxs; + + graph->xadj = gk_zmalloc(nvtxs+1, "gk_graph_Read: xadj"); + graph->adjncy = gk_i32malloc(nedges, "gk_graph_Read: adjncy"); + if (readvals) { + if (isfewgts) + graph->fadjwgt = gk_fmalloc(nedges, "gk_graph_Read: fadjwgt"); + else + graph->iadjwgt = gk_i32malloc(nedges, "gk_graph_Read: iadjwgt"); + } + + if (readsizes) { + if (isfvsizes) + graph->fvsizes = gk_fmalloc(nvtxs, "gk_graph_Read: fvsizes"); + else + graph->ivsizes = gk_i32malloc(nvtxs, "gk_graph_Read: ivsizes"); + } + + if (readwgts) { + if (isfvwgts) + graph->fvwgts = gk_fmalloc(nvtxs*ncon, "gk_graph_Read: fvwgts"); + else + graph->ivwgts = gk_i32malloc(nvtxs*ncon, "gk_graph_Read: ivwgts"); + } + + + /*---------------------------------------------------------------------- + * Read the sparse graph file + *---------------------------------------------------------------------*/ + numbering = (numbering ? - 1 : 0); + for (graph->xadj[0]=0, k=0, i=0; ifvsizes[i] = (float)strtod(head, &tail); +#else + graph->fvsizes[i] = strtof(head, &tail); +#endif + if (tail == head) + gk_errexit(SIGERR, "The line for vertex %zd does not have size information\n", i+1); + if (graph->fvsizes[i] < 0) + gk_errexit(SIGERR, "The size for vertex %zd must be >= 0\n", i+1); + } + else { + graph->ivsizes[i] = strtol(head, &tail, 0); + if (tail == head) + gk_errexit(SIGERR, "The line for vertex %zd does not have size information\n", i+1); + if (graph->ivsizes[i] < 0) + gk_errexit(SIGERR, "The size for vertex %zd must be >= 0\n", i+1); + } + head = tail; + } + + /* Read vertex weights */ + if (readwgts) { + for (l=0; lfvwgts[i*ncon+l] = (float)strtod(head, &tail); +#else + graph->fvwgts[i*ncon+l] = strtof(head, &tail); +#endif + if (tail == head) + gk_errexit(SIGERR, "The line for vertex %zd does not have enough weights " + "for the %d constraints.\n", i+1, ncon); + if (graph->fvwgts[i*ncon+l] < 0) + gk_errexit(SIGERR, "The weight vertex %zd and constraint %zd must be >= 0\n", i+1, l); + } + else { + graph->ivwgts[i*ncon+l] = strtol(head, &tail, 0); + if (tail == head) + gk_errexit(SIGERR, "The line for vertex %zd does not have enough weights " + "for the %d constraints.\n", i+1, ncon); + if (graph->ivwgts[i*ncon+l] < 0) + gk_errexit(SIGERR, "The weight vertex %zd and constraint %zd must be >= 0\n", i+1, l); + } + head = tail; + } + } + + + /* Read the rest of the row */ + while (1) { + ival = (int)strtol(head, &tail, 0); + if (tail == head) + break; + head = tail; + + if ((graph->adjncy[k] = ival + numbering) < 0) + gk_errexit(SIGERR, "Error: Invalid column number %d at row %zd.\n", ival, i); + + if (readvals) { + if (isfewgts) { +#ifdef __MSC__ + fval = (float)strtod(head, &tail); +#else + fval = strtof(head, &tail); +#endif + if (tail == head) + gk_errexit(SIGERR, "Value could not be found for edge! Vertex:%zd, NNZ:%zd\n", i, k); + + graph->fadjwgt[k] = fval; + } + else { + ival = strtol(head, &tail, 0); + if (tail == head) + gk_errexit(SIGERR, "Value could not be found for edge! Vertex:%zd, NNZ:%zd\n", i, k); + + graph->iadjwgt[k] = ival; + } + head = tail; + } + k++; + } + graph->xadj[i+1] = k; + } + + if (k != nedges) + gk_errexit(SIGERR, "gk_graph_Read: Something wrong with the number of edges in " + "the input file. nedges=%zd, Actualnedges=%zd.\n", nedges, k); + + gk_fclose(fpin); + + gk_free((void **)&line, LTERM); + + break; + + case GK_GRAPH_FMT_IJV: + case GK_GRAPH_FMT_HIJV: + gk_getfilestats(filename, &nvtxs, &nedges, NULL, NULL); + + if (format == GK_GRAPH_FMT_HIJV) { /* remove the #rows/#cols values and row */ + nedges -= 2; + nvtxs -= 1; + } + + if (hasvals == 1 && 3*nvtxs != nedges) + gk_errexit(SIGERR, "Error: The number of numbers (%zd %d) in the input file is not a multiple of 3.\n", nedges, hasvals); + if (hasvals == 0 && 2*nvtxs != nedges) + gk_errexit(SIGERR, "Error: The number of numbers (%zd %d) in the input file is not a multiple of 2.\n", nedges, hasvals); + + nedges = nvtxs; + numbering = (numbering ? -1 : 0); + + /* read the data into three arrays */ + iinds = gk_i32malloc(nedges, "iinds"); + jinds = gk_i32malloc(nedges, "jinds"); + if (hasvals) { + if (isfewgts) + fvals = gk_fmalloc(nedges, "fvals"); + else + ivals = gk_i32malloc(nedges, "ivals"); + } + + fpin = gk_fopen(filename, "r", "gk_graph_Read: fpin"); + + if (format == GK_GRAPH_FMT_HIJV) { /* read and ignore the #rows/#cols values */ + if (fscanf(fpin, "%zd %zd", &i, &i) != 2) + gk_errexit(SIGERR, "Error: Failed to read the header line.\n"); + } + + for (nvtxs=0, i=0; invtxs = ++nvtxs; + xadj = graph->xadj = gk_zsmalloc(nvtxs+1, 0, "xadj"); + adjncy = graph->adjncy = gk_i32malloc(nedges, "adjncy"); + if (hasvals) { + if (isfewgts) + fadjwgt = graph->fadjwgt = gk_fmalloc(nedges, "fadjwgt"); + else + iadjwgt = graph->iadjwgt = gk_i32malloc(nedges, "iadjwgt"); + } + + for (i=0; iiadjwgt || graph->fadjwgt); + hasvwgts = (graph->ivwgts || graph->fvwgts); + hasvsizes = (graph->ivsizes || graph->fvsizes); + + switch (format) { + case GK_GRAPH_FMT_METIS: + /* write the header line */ + fprintf(fpout, "%d %zd", graph->nvtxs, graph->xadj[graph->nvtxs]/2); + if (hasvwgts || hasvsizes || hasewgts) + fprintf(fpout, " %d%d%d", hasvsizes, hasvwgts, hasewgts); + fprintf(fpout, "\n"); + + + for (i=0; invtxs; i++) { + if (hasvsizes) { + if (graph->ivsizes) + fprintf(fpout, " %d", graph->ivsizes[i]); + else + fprintf(fpout, " %f", graph->fvsizes[i]); + } + + if (hasvwgts) { + if (graph->ivwgts) + fprintf(fpout, " %d", graph->ivwgts[i]); + else + fprintf(fpout, " %f", graph->fvwgts[i]); + } + + for (j=graph->xadj[i]; jxadj[i+1]; j++) { + fprintf(fpout, " %d", graph->adjncy[j]+1); + if (hasewgts) { + if (graph->iadjwgt) + fprintf(fpout, " %d", graph->iadjwgt[j]); + else + fprintf(fpout, " %f", graph->fadjwgt[j]); + } + } + fprintf(fpout, "\n"); + } + break; + + case GK_GRAPH_FMT_IJV: + for (i=0; invtxs; i++) { + for (j=graph->xadj[i]; jxadj[i+1]; j++) { + fprintf(fpout, "%d %d ", i+numbering, graph->adjncy[j]+numbering); + if (hasewgts) { + if (graph->iadjwgt) + fprintf(fpout, " %d\n", graph->iadjwgt[j]); + else + fprintf(fpout, " %f\n", graph->fadjwgt[j]); + } + else { + fprintf(fpout, " 1\n"); + } + } + } + break; + + default: + gk_errexit(SIGERR, "Unknown file format. %d\n", format); + } + + if (filename) + gk_fclose(fpout); +} + + +/*************************************************************************/ +/*! Returns a copy of a graph. + \param graph is the graph to be duplicated. + \returns the newly created copy of the graph. +*/ +/**************************************************************************/ +gk_graph_t *gk_graph_Dup(gk_graph_t *graph) +{ + gk_graph_t *ngraph; + + ngraph = gk_graph_Create(); + + ngraph->nvtxs = graph->nvtxs; + + /* copy the adjacency structure */ + if (graph->xadj) + ngraph->xadj = gk_zcopy(graph->nvtxs+1, graph->xadj, + gk_zmalloc(graph->nvtxs+1, "gk_graph_Dup: xadj")); + if (graph->ivwgts) + ngraph->ivwgts = gk_i32copy(graph->nvtxs, graph->ivwgts, + gk_i32malloc(graph->nvtxs, "gk_graph_Dup: ivwgts")); + if (graph->ivsizes) + ngraph->ivsizes = gk_i32copy(graph->nvtxs, graph->ivsizes, + gk_i32malloc(graph->nvtxs, "gk_graph_Dup: ivsizes")); + if (graph->vlabels) + ngraph->vlabels = gk_i32copy(graph->nvtxs, graph->vlabels, + gk_i32malloc(graph->nvtxs, "gk_graph_Dup: ivlabels")); + if (graph->fvwgts) + ngraph->fvwgts = gk_fcopy(graph->nvtxs, graph->fvwgts, + gk_fmalloc(graph->nvtxs, "gk_graph_Dup: fvwgts")); + if (graph->fvsizes) + ngraph->fvsizes = gk_fcopy(graph->nvtxs, graph->fvsizes, + gk_fmalloc(graph->nvtxs, "gk_graph_Dup: fvsizes")); + + + if (graph->adjncy) + ngraph->adjncy = gk_i32copy(graph->xadj[graph->nvtxs], graph->adjncy, + gk_i32malloc(graph->xadj[graph->nvtxs], "gk_graph_Dup: adjncy")); + if (graph->iadjwgt) + ngraph->iadjwgt = gk_i32copy(graph->xadj[graph->nvtxs], graph->iadjwgt, + gk_i32malloc(graph->xadj[graph->nvtxs], "gk_graph_Dup: iadjwgt")); + if (graph->fadjwgt) + ngraph->fadjwgt = gk_fcopy(graph->xadj[graph->nvtxs], graph->fadjwgt, + gk_fmalloc(graph->xadj[graph->nvtxs], "gk_graph_Dup: fadjwgt")); + + return ngraph; +} + + +/*************************************************************************/ +/*! Returns the transpose of a graph. + \param graph is the graph to be transposed. + \returns the newly created copy of the graph. +*/ +/**************************************************************************/ +gk_graph_t *gk_graph_Transpose(gk_graph_t *graph) +{ + int32_t vi, vj; + ssize_t ei; + + gk_graph_t *ngraph; + + ngraph = gk_graph_Create(); + + ngraph->nvtxs = graph->nvtxs; + ngraph->xadj = gk_zsmalloc(graph->nvtxs+1, 0, "gk_graph_Transpose: xadj"); + ngraph->adjncy = gk_i32malloc(graph->xadj[graph->nvtxs], "gk_graph_Transpose: adjncy"); + + if (graph->iadjwgt) + ngraph->iadjwgt = gk_i32malloc(graph->xadj[graph->nvtxs], "gk_graph_Transpose: iadjwgt"); + if (graph->fadjwgt) + ngraph->fadjwgt = gk_fmalloc(graph->xadj[graph->nvtxs], "gk_graph_Transpose: fadjwgt"); + + for (vi=0; vinvtxs; vi++) { + for (ei=graph->xadj[vi]; eixadj[vi+1]; ei++) + ngraph->xadj[graph->adjncy[ei]]++; + } + MAKECSR(vi, ngraph->nvtxs, ngraph->xadj); + + for (vi=0; vinvtxs; vi++) { + for (ei=graph->xadj[vi]; eixadj[vi+1]; ei++) { + vj = graph->adjncy[ei]; + ngraph->adjncy[ngraph->xadj[vj]] = vi; + if (ngraph->iadjwgt) + ngraph->iadjwgt[ngraph->xadj[vj]] = graph->iadjwgt[ei]; + if (ngraph->fadjwgt) + ngraph->fadjwgt[ngraph->xadj[vj]] = graph->fadjwgt[ei]; + ngraph->xadj[vj]++; + } + } + SHIFTCSR(vi, ngraph->nvtxs, ngraph->xadj); + + /* copy vertex attributes */ + if (graph->ivwgts) + ngraph->ivwgts = gk_i32copy(graph->nvtxs, graph->ivwgts, + gk_i32malloc(graph->nvtxs, "gk_graph_Transpose: ivwgts")); + if (graph->ivsizes) + ngraph->ivsizes = gk_i32copy(graph->nvtxs, graph->ivsizes, + gk_i32malloc(graph->nvtxs, "gk_graph_Transpose: ivsizes")); + if (graph->vlabels) + ngraph->vlabels = gk_i32copy(graph->nvtxs, graph->vlabels, + gk_i32malloc(graph->nvtxs, "gk_graph_Transpose: ivlabels")); + if (graph->fvwgts) + ngraph->fvwgts = gk_fcopy(graph->nvtxs, graph->fvwgts, + gk_fmalloc(graph->nvtxs, "gk_graph_Transpose: fvwgts")); + if (graph->fvsizes) + ngraph->fvsizes = gk_fcopy(graph->nvtxs, graph->fvsizes, + gk_fmalloc(graph->nvtxs, "gk_graph_Transpose: fvsizes")); + + + return ngraph; +} + + +/*************************************************************************/ +/*! Returns a subgraph containing a set of consecutive vertices. + \param graph is the original graph. + \param vstart is the starting vertex. + \param nvtxs is the number of vertices from vstart to extract. + \returns the newly created subgraph. +*/ +/**************************************************************************/ +gk_graph_t *gk_graph_ExtractSubgraph(gk_graph_t *graph, int vstart, int nvtxs) +{ + ssize_t i; + gk_graph_t *ngraph; + + if (vstart+nvtxs > graph->nvtxs) + return NULL; + + ngraph = gk_graph_Create(); + + ngraph->nvtxs = nvtxs; + + /* copy the adjancy structure */ + if (graph->xadj) + ngraph->xadj = gk_zcopy(nvtxs+1, graph->xadj+vstart, + gk_zmalloc(nvtxs+1, "gk_graph_ExtractSubgraph: xadj")); + for (i=nvtxs; i>=0; i--) + ngraph->xadj[i] -= ngraph->xadj[0]; + ASSERT(ngraph->xadj[0] == 0); + + if (graph->ivwgts) + ngraph->ivwgts = gk_i32copy(nvtxs, graph->ivwgts+vstart, + gk_i32malloc(nvtxs, "gk_graph_ExtractSubgraph: ivwgts")); + if (graph->ivsizes) + ngraph->ivsizes = gk_i32copy(nvtxs, graph->ivsizes+vstart, + gk_i32malloc(nvtxs, "gk_graph_ExtractSubgraph: ivsizes")); + if (graph->vlabels) + ngraph->vlabels = gk_i32copy(nvtxs, graph->vlabels+vstart, + gk_i32malloc(nvtxs, "gk_graph_ExtractSubgraph: vlabels")); + + if (graph->fvwgts) + ngraph->fvwgts = gk_fcopy(nvtxs, graph->fvwgts+vstart, + gk_fmalloc(nvtxs, "gk_graph_ExtractSubgraph: fvwgts")); + if (graph->fvsizes) + ngraph->fvsizes = gk_fcopy(nvtxs, graph->fvsizes+vstart, + gk_fmalloc(nvtxs, "gk_graph_ExtractSubgraph: fvsizes")); + + + ASSERT(ngraph->xadj[nvtxs] == graph->xadj[vstart+nvtxs]-graph->xadj[vstart]); + if (graph->adjncy) + ngraph->adjncy = gk_i32copy(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], + graph->adjncy+graph->xadj[vstart], + gk_i32malloc(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], + "gk_graph_ExtractSubgraph: adjncy")); + if (graph->iadjwgt) + ngraph->iadjwgt = gk_i32copy(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], + graph->iadjwgt+graph->xadj[vstart], + gk_i32malloc(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], + "gk_graph_ExtractSubgraph: iadjwgt")); + if (graph->fadjwgt) + ngraph->fadjwgt = gk_fcopy(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], + graph->fadjwgt+graph->xadj[vstart], + gk_fmalloc(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], + "gk_graph_ExtractSubgraph: fadjwgt")); + + return ngraph; +} + + +/*************************************************************************/ +/*! Returns a graph that has been reordered according to the permutation. + \param[IN] graph is the graph to be re-ordered. + \param[IN] perm is the new ordering of the graph's vertices + \param[IN] iperm is the original ordering of the re-ordered graph's vertices + \returns the newly created copy of the graph. + + \note Either perm or iperm can be NULL but not both. +*/ +/**************************************************************************/ +gk_graph_t *gk_graph_Reorder(gk_graph_t *graph, int32_t *perm, int32_t *iperm) +{ + ssize_t j, jj, *xadj; + int i, k, u, v, nvtxs; + int freeperm=0, freeiperm=0; + int32_t *adjncy; + gk_graph_t *ngraph; + + if (perm == NULL && iperm == NULL) + return NULL; + + ngraph = gk_graph_Create(); + + ngraph->nvtxs = nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + /* allocate memory for the different structures that are present in graph */ + if (graph->xadj) + ngraph->xadj = gk_zmalloc(nvtxs+1, "gk_graph_Reorder: xadj"); + + if (graph->ivwgts) + ngraph->ivwgts = gk_i32malloc(nvtxs, "gk_graph_Reorder: ivwgts"); + + if (graph->ivsizes) + ngraph->ivsizes = gk_i32malloc(nvtxs, "gk_graph_Reorder: ivsizes"); + + if (graph->vlabels) + ngraph->vlabels = gk_i32malloc(nvtxs, "gk_graph_Reorder: ivlabels"); + + if (graph->fvwgts) + ngraph->fvwgts = gk_fmalloc(nvtxs, "gk_graph_Reorder: fvwgts"); + + if (graph->fvsizes) + ngraph->fvsizes = gk_fmalloc(nvtxs, "gk_graph_Reorder: fvsizes"); + + + if (graph->adjncy) + ngraph->adjncy = gk_i32malloc(graph->xadj[nvtxs], "gk_graph_Reorder: adjncy"); + + if (graph->iadjwgt) + ngraph->iadjwgt = gk_i32malloc(graph->xadj[nvtxs], "gk_graph_Reorder: iadjwgt"); + + if (graph->fadjwgt) + ngraph->fadjwgt = gk_fmalloc(graph->xadj[nvtxs], "gk_graph_Reorder: fadjwgt"); + + + /* create perm/iperm if not provided */ + if (perm == NULL) { + freeperm = 1; + perm = gk_i32malloc(nvtxs, "gk_graph_Reorder: perm"); + for (i=0; ixadj[0] = jj = 0; + for (v=0; vadjncy[jj] = perm[adjncy[j]]; + if (graph->iadjwgt) + ngraph->iadjwgt[jj] = graph->iadjwgt[j]; + if (graph->fadjwgt) + ngraph->fadjwgt[jj] = graph->fadjwgt[j]; + } + if (graph->ivwgts) + ngraph->ivwgts[v] = graph->ivwgts[u]; + if (graph->fvwgts) + ngraph->fvwgts[v] = graph->fvwgts[u]; + if (graph->ivsizes) + ngraph->ivsizes[v] = graph->ivsizes[u]; + if (graph->fvsizes) + ngraph->fvsizes[v] = graph->fvsizes[u]; + if (graph->vlabels) + ngraph->vlabels[v] = graph->vlabels[u]; + + ngraph->xadj[v+1] = jj; + } + + + /* free memory */ + if (freeperm) + gk_free((void **)&perm, LTERM); + if (freeiperm) + gk_free((void **)&iperm, LTERM); + + return ngraph; +} + + +/*************************************************************************/ +/*! This function finds the connected components in a graph. + + \param graph is the graph structure + \param cptr is the ptr structure of the CSR representation of the + components. The length of this vector must be graph->nvtxs+1. + \param cind is the indices structure of the CSR representation of + the components. The length of this vector must be graph->nvtxs. + + \returns the number of components that it found. + + \note The cptr and cind parameters can be NULL, in which case only the + number of connected components is returned. +*/ +/*************************************************************************/ +int gk_graph_FindComponents(gk_graph_t *graph, int32_t *cptr, int32_t *cind) +{ + ssize_t i, ii, j, jj, k, nvtxs, first, last, ntodo, ncmps; + ssize_t *xadj; + int32_t *adjncy, *pos, *todo; + int32_t mustfree_ccsr=0; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + /* Deal with NULL supplied cptr/cind vectors */ + if (cptr == NULL) { + cptr = gk_i32malloc(nvtxs+1, "gk_graph_FindComponents: cptr"); + cind = gk_i32malloc(nvtxs, "gk_graph_FindComponents: cind"); + mustfree_ccsr = 1; + } + + /* The list of vertices that have not been touched yet. + The valid entries are from [0..ntodo). */ + todo = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: todo")); + + /* For a vertex that has not been visited, pos[i] is the position in the + todo list that this vertex is stored. + If a vertex has been visited, pos[i] = -1. */ + pos = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: pos")); + + + /* Find the connected componends */ + ncmps = -1; + ntodo = nvtxs; /* All vertices have not been visited */ + first = last = 0; /* Point to the first and last vertices that have been touched + but not explored. + These vertices are stored in cind[first]...cind[last-1]. */ + while (1) { + if (first == last) { /* Find another starting vertex */ + cptr[++ncmps] = first; /* Mark the end of the current CC */ + + if (ntodo > 0) { + /* put the first vertex in the todo list as the start of the new CC */ + GKASSERT(pos[todo[0]] != -1); + cind[last++] = todo[0]; + + pos[todo[0]] = -1; + todo[0] = todo[--ntodo]; + pos[todo[0]] = 0; + } + else { + break; + } + } + + i = cind[first++]; /* Get the first visited but unexplored vertex */ + + for (j=xadj[i]; jnvtxs <= 0) + return; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + /* This array will function like pos + touched of the CC method */ + pos = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_ComputeBFSOrdering: pos")); + + /* This array ([C]losed[O]pen[T]odo => cot) serves three purposes. + Positions from [0...first) is the current iperm[] vector of the explored vertices; + Positions from [first...last) is the OPEN list (i.e., visited vertices); + Positions from [last...nvtxs) is the todo list. */ + cot = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_ComputeBFSOrdering: cot")); + + + /* put v at the front of the todo list */ + pos[0] = cot[0] = v; + pos[v] = cot[v] = 0; + + /* compute a BFS ordering from the seed vertex */ + first = last = 0; + while (first < nvtxs) { + if (first == last) { /* Find another starting vertex */ + k = cot[last]; + ASSERT(pos[k] != -1); + pos[k] = -1; /* mark node as being visited */ + last++; + } + + i = cot[first++]; /* the ++ advances the explored vertices */ + for (j=xadj[i]; jnvtxs <= 0) + return; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + /* the degree of the vertices in the closed list */ + degrees = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: degrees"); + + /* the minimum vertex ID of an open vertex to the closed list */ + minIDs = gk_i32smalloc(nvtxs, nvtxs+1, "gk_graph_ComputeBestFOrdering: minIDs"); + + /* the open list */ + open = gk_i32malloc(nvtxs, "gk_graph_ComputeBestFOrdering: open"); + + /* if perm[i] >= 0, then perm[i] is the order of vertex i; + otherwise perm[i] == -1. + */ + perm = gk_i32smalloc(nvtxs, -1, "gk_graph_ComputeBestFOrdering: perm"); + + /* create the queue and put everything in it */ + queue = gk_i32pqCreate(nvtxs); + for (i=0; invtxs <= 0) + return; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + /* the degree of the vertices in the closed list */ + degrees = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: degrees"); + + /* the weighted degree of the vertices in the closed list for type==3 */ + wdegrees = gk_i64smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: wdegrees"); + + /* the sum of differences for type==4 */ + sod = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: sod"); + + /* the encountering level of a vertex type==5 */ + level = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: level"); + + /* The open+todo list of vertices. + The vertices from [0..nopen] are the open vertices. + The vertices from [nopen..ntodo) are the todo vertices. + */ + ot = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: ot")); + + /* For a vertex that has not been explored, pos[i] is the position in the ot list. */ + pos = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: pos")); + + /* if perm[i] >= 0, then perm[i] is the order of vertex i; otherwise perm[i] == -1. */ + perm = gk_i32smalloc(nvtxs, -1, "gk_graph_ComputeBestFOrdering: perm"); + + /* create the queue and put the starting vertex in it */ + queue = gk_i32pqCreate(nvtxs); + gk_i32pqInsert(queue, v, 1); + + /* put v at the front of the open list */ + pos[0] = ot[0] = v; + pos[v] = ot[v] = 0; + nopen = 1; + ntodo = nvtxs; + + /* start processing the nodes */ + for (i=0; i= nopen) + gk_errexit(SIGERR, "The position of v is not in open list. pos[%d]=%d is >=%d.\n", v, pos[v], nopen); + + /* remove v from the open list and re-arrange the todo part of the list */ + ot[pos[v]] = ot[nopen-1]; + pos[ot[nopen-1]] = pos[v]; + if (ntodo > nopen) { + ot[nopen-1] = ot[ntodo-1]; + pos[ot[ntodo-1]] = nopen-1; + } + nopen--; + ntodo--; + + for (j=xadj[v]; jnvtxs <= 0) + return; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + inqueue = gk_i32smalloc(nvtxs, 0, "gk_graph_SingleSourceShortestPaths: inqueue"); + + /* determine if you will be computing using int32_t or float and proceed from there */ + if (graph->iadjwgt != NULL) { + gk_i32pq_t *queue; + int32_t *adjwgt; + int32_t *sps; + + adjwgt = graph->iadjwgt; + + queue = gk_i32pqCreate(nvtxs); + gk_i32pqInsert(queue, v, 0); + inqueue[v] = 1; + + sps = gk_i32smalloc(nvtxs, -1, "gk_graph_SingleSourceShortestPaths: sps"); + sps[v] = 0; + + /* start processing the nodes */ + while ((v = gk_i32pqGetTop(queue)) != -1) { + inqueue[v] = 2; + + /* relax the adjacent edges */ + for (i=xadj[v]; ifadjwgt; + + queue = gk_fpqCreate(nvtxs); + gk_fpqInsert(queue, v, 0); + inqueue[v] = 1; + + sps = gk_fsmalloc(nvtxs, -1, "gk_graph_SingleSourceShortestPaths: sps"); + sps[v] = 0; + + /* start processing the nodes */ + while ((v = gk_fpqGetTop(queue)) != -1) { + inqueue[v] = 2; + + /* relax the adjacent edges */ + for (i=xadj[v]; invtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + iadjwgt = graph->iadjwgt; + fadjwgt = graph->fadjwgt; + + #pragma omp parallel if (nvtxs > 100) + { + ssize_t i, j, k; + gk_ikv_t *cand; + int32_t *itwgts=NULL; + float *ftwgts=NULL; + + #pragma omp single + for (i=0; i xadj[i] && adjncy[j] < adjncy[j-1]) + k = 1; /* an inversion */ + cand[j-xadj[i]].val = (int32_t)(j-xadj[i]); + cand[j-xadj[i]].key = adjncy[j]; + if (itwgts) + itwgts[j-xadj[i]] = iadjwgt[j]; + if (ftwgts) + ftwgts[j-xadj[i]] = fadjwgt[j]; + } + if (k) { + gk_ikvsorti(xadj[i+1]-xadj[i], cand); + for (j=xadj[i]; jiadjwgt != NULL || graph->fadjwgt != NULL); + + nrows = graph->nvtxs; + rowptr = graph->xadj; + rowind = graph->adjncy; + if (hasvals) { + irowval = graph->iadjwgt; + rowval = graph->fadjwgt; + } + + /* create the column view for efficient processing */ + colptr = gk_zsmalloc(nrows+1, 0, "colptr"); + colind = gk_i32malloc(rowptr[nrows], "colind"); + if (hasvals) { + if (rowval) + colval = gk_fmalloc(rowptr[nrows], "colval"); + if (irowval) + icolval = gk_i32malloc(rowptr[nrows], "icolval"); + } + + for (i=0; invtxs = graph->nvtxs; + + nrowptr = ngraph->xadj = gk_zmalloc(nrows+1, "gk_csr_MakeSymmetric: nrowptr"); + nrowind = ngraph->adjncy = gk_imalloc(2*rowptr[nrows], "gk_csr_MakeSymmetric: nrowind"); + if (hasvals) { + if (rowval) + nrowval = graph->fadjwgt = gk_fmalloc(2*rowptr[nrows], "gk_csr_MakeSymmetric: nrowval"); + if (irowval) + nirowval = graph->iadjwgt = gk_i32malloc(2*rowptr[nrows], "gk_csr_MakeSymmetric: nrowval"); + } + + marker = gk_ismalloc(nrows, -1, "marker"); + ids = gk_imalloc(nrows, "ids"); + if (hasvals) { + if (rowval) + wgts = gk_fmalloc(nrows, "wgts"); + if (irowval) + iwgts = gk_i32malloc(nrows, "wgts"); + } + + nrowptr[0] = nnz = 0; + for (i=0; inrows = nrows; + ngraph->ncols = graph->ncols; + + for (nnz=0, i=0; irowptr[rind[i]+1]-graph->rowptr[rind[i]]; + + ngraph->rowptr = gk_zmalloc(ngraph->nrows+1, "gk_graph_ExtractPartition: rowptr"); + ngraph->rowind = gk_imalloc(nnz, "gk_graph_ExtractPartition: rowind"); + ngraph->rowval = gk_fmalloc(nnz, "gk_graph_ExtractPartition: rowval"); + + ngraph->rowptr[0] = 0; + for (nnz=0, j=0, ii=0; iirowptr[i+1]-graph->rowptr[i], graph->rowind+graph->rowptr[i], ngraph->rowind+nnz); + gk_fcopy(graph->rowptr[i+1]-graph->rowptr[i], graph->rowval+graph->rowptr[i], ngraph->rowval+nnz); + nnz += graph->rowptr[i+1]-graph->rowptr[i]; + ngraph->rowptr[++j] = nnz; + } + ASSERT(j == ngraph->nrows); + + return ngraph; +} + + +/*************************************************************************/ +/*! Returns a subgraphrix corresponding to a specified partitioning of rows. + \param graph is the original graphrix. + \param part is the partitioning vector of the rows. + \param pid is the partition ID that will be extracted. + \returns the row structure of the newly created subgraphrix. +*/ +/**************************************************************************/ +gk_graph_t *gk_graph_ExtractPartition(gk_graph_t *graph, int *part, int pid) +{ + ssize_t i, j, nnz; + gk_graph_t *ngraph; + + ngraph = gk_graph_Create(); + + ngraph->nrows = 0; + ngraph->ncols = graph->ncols; + + for (nnz=0, i=0; inrows; i++) { + if (part[i] == pid) { + ngraph->nrows++; + nnz += graph->rowptr[i+1]-graph->rowptr[i]; + } + } + + ngraph->rowptr = gk_zmalloc(ngraph->nrows+1, "gk_graph_ExtractPartition: rowptr"); + ngraph->rowind = gk_imalloc(nnz, "gk_graph_ExtractPartition: rowind"); + ngraph->rowval = gk_fmalloc(nnz, "gk_graph_ExtractPartition: rowval"); + + ngraph->rowptr[0] = 0; + for (nnz=0, j=0, i=0; inrows; i++) { + if (part[i] == pid) { + gk_icopy(graph->rowptr[i+1]-graph->rowptr[i], graph->rowind+graph->rowptr[i], ngraph->rowind+nnz); + gk_fcopy(graph->rowptr[i+1]-graph->rowptr[i], graph->rowval+graph->rowptr[i], ngraph->rowval+nnz); + nnz += graph->rowptr[i+1]-graph->rowptr[i]; + ngraph->rowptr[++j] = nnz; + } + } + ASSERT(j == ngraph->nrows); + + return ngraph; +} + + +/*************************************************************************/ +/*! Splits the graphrix into multiple sub-graphrices based on the provided + color array. + \param graph is the original graphrix. + \param color is an array of size equal to the number of non-zeros + in the graphrix (row-wise structure). The graphrix is split into + as many parts as the number of colors. For meaningfull results, + the colors should be numbered consecutively starting from 0. + \returns an array of graphrices for each supplied color number. +*/ +/**************************************************************************/ +gk_graph_t **gk_graph_Split(gk_graph_t *graph, int *color) +{ + ssize_t i, j; + int nrows, ncolors; + ssize_t *rowptr; + int *rowind; + float *rowval; + gk_graph_t **sgraphs; + + nrows = graph->nrows; + rowptr = graph->rowptr; + rowind = graph->rowind; + rowval = graph->rowval; + + ncolors = gk_imax(rowptr[nrows], color)+1; + + sgraphs = (gk_graph_t **)gk_malloc(sizeof(gk_graph_t *)*ncolors, "gk_graph_Split: sgraphs"); + for (i=0; inrows = graph->nrows; + sgraphs[i]->ncols = graph->ncols; + sgraphs[i]->rowptr = gk_zsmalloc(nrows+1, 0, "gk_graph_Split: sgraphs[i]->rowptr"); + } + + for (i=0; irowptr[i]++; + } + for (i=0; irowptr); + + for (i=0; irowind = gk_imalloc(sgraphs[i]->rowptr[nrows], "gk_graph_Split: sgraphs[i]->rowind"); + sgraphs[i]->rowval = gk_fmalloc(sgraphs[i]->rowptr[nrows], "gk_graph_Split: sgraphs[i]->rowval"); + } + + for (i=0; irowind[sgraphs[color[j]]->rowptr[i]] = rowind[j]; + sgraphs[color[j]]->rowval[sgraphs[color[j]]->rowptr[i]] = rowval[j]; + sgraphs[color[j]]->rowptr[i]++; + } + } + + for (i=0; irowptr); + + return sgraphs; +} + + +/*************************************************************************/ +/*! Prunes certain rows/columns of the graphrix. The prunning takes place + by analyzing the row structure of the graphrix. The prunning takes place + by removing rows/columns but it does not affect the numbering of the + remaining rows/columns. + + \param graph the graphrix to be prunned, + \param what indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL) + of the graphrix will be prunned, + \param minf is the minimum number of rows (columns) that a column (row) must + be present in order to be kept, + \param maxf is the maximum number of rows (columns) that a column (row) must + be present at in order to be kept. + \returns the prunned graphrix consisting only of its row-based structure. + The input graphrix is not modified. +*/ +/**************************************************************************/ +gk_graph_t *gk_graph_Prune(gk_graph_t *graph, int what, int minf, int maxf) +{ + ssize_t i, j, nnz; + int nrows, ncols; + ssize_t *rowptr, *nrowptr; + int *rowind, *nrowind, *collen; + float *rowval, *nrowval; + gk_graph_t *ngraph; + + ngraph = gk_graph_Create(); + + nrows = ngraph->nrows = graph->nrows; + ncols = ngraph->ncols = graph->ncols; + + rowptr = graph->rowptr; + rowind = graph->rowind; + rowval = graph->rowval; + + nrowptr = ngraph->rowptr = gk_zmalloc(nrows+1, "gk_graph_Prune: nrowptr"); + nrowind = ngraph->rowind = gk_imalloc(rowptr[nrows], "gk_graph_Prune: nrowind"); + nrowval = ngraph->rowval = gk_fmalloc(rowptr[nrows], "gk_graph_Prune: nrowval"); + + + switch (what) { + case GK_CSR_COL: + collen = gk_ismalloc(ncols, 0, "gk_graph_Prune: collen"); + + for (i=0; i= minf && collen[i] <= maxf ? 1 : 0); + + nrowptr[0] = 0; + for (nnz=0, i=0; i= minf && rowptr[i+1]-rowptr[i] <= maxf) { + for (j=rowptr[i]; jrowval) { + n = graph->nrows; + ptr = graph->rowptr; + val = graph->rowval; + + #pragma omp parallel if (ptr[n] > OMPMINOPS) + { + #pragma omp for private(j,sum) schedule(static) + for (i=0; i 0 */ + } + if (sum > 0) { + if (norm == 2) + sum=1.0/sqrt(sum); + else if (norm == 1) + sum=1.0/sum; + for (j=ptr[i]; jcolval) { + n = graph->ncols; + ptr = graph->colptr; + val = graph->colval; + + #pragma omp parallel if (ptr[n] > OMPMINOPS) + { + #pragma omp for private(j,sum) schedule(static) + for (i=0; i 0) { + if (norm == 2) + sum=1.0/sqrt(sum); + else if (norm == 1) + sum=1.0/sum; + for (j=ptr[i]; j + +/****************************************************************************** +* This function creates the hash-table +*******************************************************************************/ +gk_HTable_t *HTable_Create(int nelements) +{ + gk_HTable_t *htable; + + htable = gk_malloc(sizeof(gk_HTable_t), "HTable_Create: htable"); + htable->harray = gk_ikvmalloc(nelements, "HTable_Create: harray"); + htable->nelements = nelements; + + HTable_Reset(htable); + + return htable; +} + + +/****************************************************************************** +* This function resets the data-structures associated with the hash-table +*******************************************************************************/ +void HTable_Reset(gk_HTable_t *htable) +{ + int i; + + for (i=0; inelements; i++) + htable->harray[i].key = HTABLE_EMPTY; + htable->htsize = 0; + +} + +/****************************************************************************** +* This function resizes the hash-table +*******************************************************************************/ +void HTable_Resize(gk_HTable_t *htable, int nelements) +{ + int i, old_nelements; + gk_ikv_t *old_harray; + + old_nelements = htable->nelements; + old_harray = htable->harray; + + /* prepare larger hash */ + htable->nelements = nelements; + htable->htsize = 0; + htable->harray = gk_ikvmalloc(nelements, "HTable_Resize: harray"); + for (i=0; iharray[i].key = HTABLE_EMPTY; + + /* reassign the values */ + for (i=0; ihtsize > htable->nelements/2) + HTable_Resize(htable, 2*htable->nelements); + + first = HTable_HFunction(htable->nelements, key); + + for (i=first; inelements; i++) { + if (htable->harray[i].key == HTABLE_EMPTY || htable->harray[i].key == HTABLE_DELETED) { + htable->harray[i].key = key; + htable->harray[i].val = val; + htable->htsize++; + return; + } + } + + for (i=0; iharray[i].key == HTABLE_EMPTY || htable->harray[i].key == HTABLE_DELETED) { + htable->harray[i].key = key; + htable->harray[i].val = val; + htable->htsize++; + return; + } + } + +} + + +/****************************************************************************** +* This function deletes key from the htable +*******************************************************************************/ +void HTable_Delete(gk_HTable_t *htable, int key) +{ + int i, first; + + first = HTable_HFunction(htable->nelements, key); + + for (i=first; inelements; i++) { + if (htable->harray[i].key == key) { + htable->harray[i].key = HTABLE_DELETED; + htable->htsize--; + return; + } + } + + for (i=0; iharray[i].key == key) { + htable->harray[i].key = HTABLE_DELETED; + htable->htsize--; + return; + } + } + +} + + +/****************************************************************************** +* This function returns the data associated with the key in the hastable +*******************************************************************************/ +int HTable_Search(gk_HTable_t *htable, int key) +{ + int i, first; + + first = HTable_HFunction(htable->nelements, key); + + for (i=first; inelements; i++) { + if (htable->harray[i].key == key) + return htable->harray[i].val; + else if (htable->harray[i].key == HTABLE_EMPTY) + return -1; + } + + for (i=0; iharray[i].key == key) + return htable->harray[i].val; + else if (htable->harray[i].key == HTABLE_EMPTY) + return -1; + } + + return -1; +} + + +/****************************************************************************** +* This function returns the next key/val +*******************************************************************************/ +int HTable_GetNext(gk_HTable_t *htable, int key, int *r_val, int type) +{ + int i; + static int first, last; + + if (type == HTABLE_FIRST) + first = last = HTable_HFunction(htable->nelements, key); + + if (first > last) { + for (i=first; inelements; i++) { + if (htable->harray[i].key == key) { + *r_val = htable->harray[i].val; + first = i+1; + return 1; + } + else if (htable->harray[i].key == HTABLE_EMPTY) + return -1; + } + first = 0; + } + + for (i=first; iharray[i].key == key) { + *r_val = htable->harray[i].val; + first = i+1; + return 1; + } + else if (htable->harray[i].key == HTABLE_EMPTY) + return -1; + } + + return -1; +} + + +/****************************************************************************** +* This function returns the data associated with the key in the hastable +*******************************************************************************/ +int HTable_SearchAndDelete(gk_HTable_t *htable, int key) +{ + int i, first; + + first = HTable_HFunction(htable->nelements, key); + + for (i=first; inelements; i++) { + if (htable->harray[i].key == key) { + htable->harray[i].key = HTABLE_DELETED; + htable->htsize--; + return htable->harray[i].val; + } + else if (htable->harray[i].key == HTABLE_EMPTY) + gk_errexit(SIGERR, "HTable_SearchAndDelete: Failed to find the key!\n"); + } + + for (i=0; iharray[i].key == key) { + htable->harray[i].key = HTABLE_DELETED; + htable->htsize--; + return htable->harray[i].val; + } + else if (htable->harray[i].key == HTABLE_EMPTY) + gk_errexit(SIGERR, "HTable_SearchAndDelete: Failed to find the key!\n"); + } + + return -1; + +} + + + +/****************************************************************************** +* This function destroys the data structures associated with the hash-table +*******************************************************************************/ +void HTable_Destroy(gk_HTable_t *htable) +{ + gk_free((void **)&htable->harray, &htable, LTERM); +} + + +/****************************************************************************** +* This is the hash-function. Based on multiplication +*******************************************************************************/ +int HTable_HFunction(int nelements, int key) +{ + return (int)(key%nelements); +} diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/io.c b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/io.c new file mode 100644 index 00000000..289b4016 --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/io.c @@ -0,0 +1,681 @@ +/*! +\file io.c +\brief Various file I/O functions. + +This file contains various functions that perform I/O. + +\date Started 4/10/95 +\author George +\version\verbatim $Id: io.c 18951 2015-08-08 20:10:46Z karypis $ \endverbatim +*/ + +#ifdef HAVE_GETLINE +/* Get getline to be defined. */ +#define _GNU_SOURCE +#include +#undef _GNU_SOURCE +#endif + +#include + +/************************************************************************* +* This function opens a file +**************************************************************************/ +FILE *gk_fopen(char *fname, char *mode, const char *msg) +{ + FILE *fp; + char errmsg[8192]; + + fp = fopen(fname, mode); + if (fp != NULL) + return fp; + + sprintf(errmsg,"file: %s, mode: %s, [%s]", fname, mode, msg); + perror(errmsg); + errexit("Failed on gk_fopen()\n"); + + return NULL; +} + + +/************************************************************************* +* This function closes a file +**************************************************************************/ +void gk_fclose(FILE *fp) +{ + fclose(fp); +} + + +/*************************************************************************/ +/*! This function is a wrapper around the read() function that ensures + that all data is been read, by issuing multiple read requests. + The only time when not 'count' items are read is when the EOF has been + reached. +*/ +/*************************************************************************/ +ssize_t gk_read(int fd, void *vbuf, size_t count) +{ + char *buf = (char *)vbuf; + ssize_t rsize, tsize=count; + + do { + if ((rsize = read(fd, buf, tsize)) == -1) + return -1; + buf += rsize; + tsize -= rsize; + } while (tsize > 0 && rsize > 0); + + return count-tsize; +} + + +/*************************************************************************/ +/*! This function is a wrapper around the write() function that ensures + that all data is been written, by issueing multiple write requests. +*/ +/*************************************************************************/ +ssize_t gk_write(int fd, void *vbuf, size_t count) +{ + char *buf = (char *)vbuf; + ssize_t size, tsize=count; + + do { + if ((size = write(fd, buf, tsize)) == -1) + return -1; + buf += size; + tsize -= size; + } while (tsize > 0); + + return count; +} + + +/*************************************************************************/ +/*! This function is the GKlib implementation of glibc's getline() + function. + \returns -1 if the EOF has been reached, otherwise it returns the + number of bytes read. +*/ +/*************************************************************************/ +ssize_t gk_getline(char **lineptr, size_t *n, FILE *stream) +{ +#ifdef HAVE_GETLINE + return getline(lineptr, n, stream); +#else + size_t i; + int ch; + + if (feof(stream)) + return -1; + + /* Initial memory allocation if *lineptr is NULL */ + if (*lineptr == NULL || *n == 0) { + *n = 1024; + *lineptr = gk_malloc((*n)*sizeof(char), "gk_getline: lineptr"); + } + + /* get into the main loop */ + i = 0; + while ((ch = getc(stream)) != EOF) { + (*lineptr)[i++] = (char)ch; + + /* reallocate memory if reached at the end of the buffer. The +1 is for '\0' */ + if (i+1 == *n) { + *n = 2*(*n); + *lineptr = gk_realloc(*lineptr, (*n)*sizeof(char), "gk_getline: lineptr"); + } + + if (ch == '\n') + break; + } + (*lineptr)[i] = '\0'; + + return (i == 0 ? -1 : i); +#endif +} + + +/*************************************************************************/ +/*! This function reads the contents of a text file and returns it in the + form of an array of strings. + \param fname is the name of the file + \param r_nlines is the number of lines in the file. If it is NULL, + this information is not returned. +*/ +/*************************************************************************/ +char **gk_readfile(char *fname, size_t *r_nlines) +{ + size_t lnlen, nlines=0; + char *line=NULL, **lines=NULL; + FILE *fpin; + + gk_getfilestats(fname, &nlines, NULL, NULL, NULL); + if (nlines > 0) { + lines = (char **)gk_malloc(nlines*sizeof(char *), "gk_readfile: lines"); + + fpin = gk_fopen(fname, "r", "gk_readfile"); + nlines = 0; + while (gk_getline(&line, &lnlen, fpin) != -1) { + gk_strtprune(line, "\n\r"); + lines[nlines++] = gk_strdup(line); + } + gk_fclose(fpin); + } + + gk_free((void **)&line, LTERM); + + if (r_nlines != NULL) + *r_nlines = nlines; + + return lines; +} + + +/*************************************************************************/ +/*! This function reads the contents of a file and returns it in the + form of an array of int32_t. + \param fname is the name of the file + \param r_nlines is the number of lines in the file. If it is NULL, + this information is not returned. +*/ +/*************************************************************************/ +int32_t *gk_i32readfile(char *fname, size_t *r_nlines) +{ + size_t lnlen, nlines=0; + char *line=NULL; + int32_t *array=NULL; + FILE *fpin; + + gk_getfilestats(fname, &nlines, NULL, NULL, NULL); + if (nlines > 0) { + array = gk_i32malloc(nlines, "gk_i32readfile: array"); + + fpin = gk_fopen(fname, "r", "gk_readfile"); + nlines = 0; + + while (gk_getline(&line, &lnlen, fpin) != -1) { + sscanf(line, "%"SCNd32, &array[nlines++]); + } + + gk_fclose(fpin); + } + + gk_free((void **)&line, LTERM); + + if (r_nlines != NULL) + *r_nlines = nlines; + + return array; +} + +/*************************************************************************/ +/*! This function reads the contents of a file and returns it in the + form of an array of int64_t. + \param fname is the name of the file + \param r_nlines is the number of lines in the file. If it is NULL, + this information is not returned. +*/ +/*************************************************************************/ +int64_t *gk_i64readfile(char *fname, size_t *r_nlines) +{ + size_t lnlen, nlines=0; + char *line=NULL; + int64_t *array=NULL; + FILE *fpin; + + gk_getfilestats(fname, &nlines, NULL, NULL, NULL); + if (nlines > 0) { + array = gk_i64malloc(nlines, "gk_i64readfile: array"); + + fpin = gk_fopen(fname, "r", "gk_readfile"); + nlines = 0; + + while (gk_getline(&line, &lnlen, fpin) != -1) { + sscanf(line, "%"SCNd64, &array[nlines++]); + } + + gk_fclose(fpin); + } + + gk_free((void **)&line, LTERM); + + if (r_nlines != NULL) + *r_nlines = nlines; + + return array; +} + +/*************************************************************************/ +/*! This function reads the contents of a file and returns it in the + form of an array of ssize_t. + \param fname is the name of the file + \param r_nlines is the number of lines in the file. If it is NULL, + this information is not returned. +*/ +/*************************************************************************/ +ssize_t *gk_zreadfile(char *fname, size_t *r_nlines) +{ + size_t lnlen, nlines=0; + char *line=NULL; + ssize_t *array=NULL; + FILE *fpin; + + gk_getfilestats(fname, &nlines, NULL, NULL, NULL); + if (nlines > 0) { + array = gk_zmalloc(nlines, "gk_zreadfile: array"); + + fpin = gk_fopen(fname, "r", "gk_readfile"); + nlines = 0; + + while (gk_getline(&line, &lnlen, fpin) != -1) { + sscanf(line, "%zd", &array[nlines++]); + } + + gk_fclose(fpin); + } + + gk_free((void **)&line, LTERM); + + if (r_nlines != NULL) + *r_nlines = nlines; + + return array; +} + +/*************************************************************************/ +/*! This function reads the contents of a binary file and returns it in the + form of an array of char. + \param fname is the name of the file + \param r_nlines is the number of lines in the file. If it is NULL, + this information is not returned. +*/ +/*************************************************************************/ +char *gk_creadfilebin(char *fname, size_t *r_nelmnts) +{ + size_t nelmnts; + ssize_t fsize; + char *array=NULL; + FILE *fpin; + + *r_nelmnts = 0; + + fsize = gk_getfsize(fname); + + if (fsize == -1) { + gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname); + return NULL; + } + + nelmnts = fsize; + array = gk_cmalloc(nelmnts, "gk_creadfilebin: array"); + + fpin = gk_fopen(fname, "rb", "gk_creadfilebin"); + if (fread(array, sizeof(char), nelmnts, fpin) != nelmnts) { + gk_errexit(SIGERR, "Failed to read the number of words requested. %zu\n", nelmnts); + gk_free((void **)&array, LTERM); + return NULL; + } + gk_fclose(fpin); + + *r_nelmnts = nelmnts; + + return array; +} + +/*************************************************************************/ +/*! This function writes the contents of an array into a binary file. + \param fname is the name of the file + \param n the number of elements in the array. + \param a the array to be written out. +*/ +/*************************************************************************/ +size_t gk_cwritefilebin(char *fname, size_t n, char *a) +{ + size_t fsize; + FILE *fp; + + fp = gk_fopen(fname, "wb", "gk_writefilebin"); + + fsize = fwrite(a, sizeof(char), n, fp); + + gk_fclose(fp); + + return fsize; +} + +/*************************************************************************/ +/*! This function reads the contents of a binary file and returns it in the + form of an array of int32_t. + \param fname is the name of the file + \param r_nlines is the number of lines in the file. If it is NULL, + this information is not returned. +*/ +/*************************************************************************/ +int32_t *gk_i32readfilebin(char *fname, size_t *r_nelmnts) +{ + size_t nelmnts; + ssize_t fsize; + int32_t *array=NULL; + FILE *fpin; + + *r_nelmnts = 0; + + fsize = gk_getfsize(fname); + + if (fsize == -1) { + gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname); + return NULL; + } + + if (fsize%sizeof(int32_t) != 0) { + gk_errexit(SIGERR, "The size [%zd] of the file [%s] is not in multiples of sizeof(int32_t).\n", fsize, fname); + return NULL; + } + + nelmnts = fsize/sizeof(int32_t); + array = gk_i32malloc(nelmnts, "gk_i32readfilebin: array"); + + fpin = gk_fopen(fname, "rb", "gk_i32readfilebin"); + + if (fread(array, sizeof(int32_t), nelmnts, fpin) != nelmnts) { + gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts); + gk_free((void **)&array, LTERM); + return NULL; + } + gk_fclose(fpin); + + *r_nelmnts = nelmnts; + + return array; +} + +/*************************************************************************/ +/*! This function writes the contents of an array into a binary file. + \param fname is the name of the file + \param n the number of elements in the array. + \param a the array to be written out. +*/ +/*************************************************************************/ +size_t gk_i32writefilebin(char *fname, size_t n, int32_t *a) +{ + size_t fsize; + FILE *fp; + + fp = gk_fopen(fname, "wb", "gk_writefilebin"); + + fsize = fwrite(a, sizeof(int32_t), n, fp); + + gk_fclose(fp); + + return fsize; +} + +/*************************************************************************/ +/*! This function reads the contents of a binary file and returns it in the + form of an array of int64_t. + \param fname is the name of the file + \param r_nlines is the number of lines in the file. If it is NULL, + this information is not returned. +*/ +/*************************************************************************/ +int64_t *gk_i64readfilebin(char *fname, size_t *r_nelmnts) +{ + size_t nelmnts; + ssize_t fsize; + int64_t *array=NULL; + FILE *fpin; + + *r_nelmnts = 0; + + fsize = gk_getfsize(fname); + + if (fsize == -1) { + gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname); + return NULL; + } + + if (fsize%sizeof(int64_t) != 0) { + gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(int64_t).\n"); + return NULL; + } + + nelmnts = fsize/sizeof(int64_t); + array = gk_i64malloc(nelmnts, "gk_i64readfilebin: array"); + + fpin = gk_fopen(fname, "rb", "gk_i64readfilebin"); + + if (fread(array, sizeof(int64_t), nelmnts, fpin) != nelmnts) { + gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts); + gk_free((void **)&array, LTERM); + return NULL; + } + gk_fclose(fpin); + + *r_nelmnts = nelmnts; + + return array; +} + +/*************************************************************************/ +/*! This function writes the contents of an array into a binary file. + \param fname is the name of the file + \param n the number of elements in the array. + \param a the array to be written out. +*/ +/*************************************************************************/ +size_t gk_i64writefilebin(char *fname, size_t n, int64_t *a) +{ + size_t fsize; + FILE *fp; + + fp = gk_fopen(fname, "wb", "gk_writefilebin"); + + fsize = fwrite(a, sizeof(int64_t), n, fp); + + gk_fclose(fp); + + return fsize; +} + +/*************************************************************************/ +/*! This function reads the contents of a binary file and returns it in the + form of an array of ssize_t. + \param fname is the name of the file + \param r_nlines is the number of lines in the file. If it is NULL, + this information is not returned. +*/ +/*************************************************************************/ +ssize_t *gk_zreadfilebin(char *fname, size_t *r_nelmnts) +{ + size_t nelmnts; + ssize_t fsize; + ssize_t *array=NULL; + FILE *fpin; + + *r_nelmnts = 0; + + fsize = gk_getfsize(fname); + + if (fsize == -1) { + gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname); + return NULL; + } + + if (fsize%sizeof(ssize_t) != 0) { + gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(ssize_t).\n"); + return NULL; + } + + nelmnts = fsize/sizeof(ssize_t); + array = gk_zmalloc(nelmnts, "gk_zreadfilebin: array"); + + fpin = gk_fopen(fname, "rb", "gk_zreadfilebin"); + + if (fread(array, sizeof(ssize_t), nelmnts, fpin) != nelmnts) { + gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts); + gk_free((void **)&array, LTERM); + return NULL; + } + gk_fclose(fpin); + + *r_nelmnts = nelmnts; + + return array; +} + +/*************************************************************************/ +/*! This function writes the contents of an array into a binary file. + \param fname is the name of the file + \param n the number of elements in the array. + \param a the array to be written out. +*/ +/*************************************************************************/ +size_t gk_zwritefilebin(char *fname, size_t n, ssize_t *a) +{ + size_t fsize; + FILE *fp; + + fp = gk_fopen(fname, "wb", "gk_writefilebin"); + + fsize = fwrite(a, sizeof(ssize_t), n, fp); + + gk_fclose(fp); + + return fsize; +} + +/*************************************************************************/ +/*! This function reads the contents of a binary file and returns it in the + form of an array of float. + \param fname is the name of the file + \param r_nlines is the number of lines in the file. If it is NULL, + this information is not returned. +*/ +/*************************************************************************/ +float *gk_freadfilebin(char *fname, size_t *r_nelmnts) +{ + size_t nelmnts; + ssize_t fsize; + float *array=NULL; + FILE *fpin; + + *r_nelmnts = 0; + + fsize = gk_getfsize(fname); + + if (fsize == -1) { + gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname); + return NULL; + } + + if (fsize%sizeof(float) != 0) { + gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(float).\n"); + return NULL; + } + + nelmnts = fsize/sizeof(float); + array = gk_fmalloc(nelmnts, "gk_freadfilebin: array"); + + fpin = gk_fopen(fname, "rb", "gk_freadfilebin"); + + if (fread(array, sizeof(float), nelmnts, fpin) != nelmnts) { + gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts); + gk_free((void **)&array, LTERM); + return NULL; + } + gk_fclose(fpin); + + *r_nelmnts = nelmnts; + + return array; +} + +/*************************************************************************/ +/*! This function writes the contents of an array into a binary file. + \param fname is the name of the file + \param n the number of elements in the array. + \param a the array to be written out. +*/ +/*************************************************************************/ +size_t gk_fwritefilebin(char *fname, size_t n, float *a) +{ + size_t fsize; + FILE *fp; + + fp = gk_fopen(fname, "wb", "gk_fwritefilebin"); + + fsize = fwrite(a, sizeof(float), n, fp); + + gk_fclose(fp); + + return fsize; +} + +/*************************************************************************/ +/*! This function reads the contents of a binary file and returns it in the + form of an array of double. + \param fname is the name of the file + \param r_nlines is the number of lines in the file. If it is NULL, + this information is not returned. +*/ +/*************************************************************************/ +double *gk_dreadfilebin(char *fname, size_t *r_nelmnts) +{ + size_t nelmnts; + ssize_t fsize; + double *array=NULL; + FILE *fpin; + + *r_nelmnts = 0; + + fsize = gk_getfsize(fname); + + if (fsize == -1) { + gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname); + return NULL; + } + + if (fsize%sizeof(double) != 0) { + gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(double).\n"); + return NULL; + } + + nelmnts = fsize/sizeof(double); + array = gk_dmalloc(nelmnts, "gk_dreadfilebin: array"); + + fpin = gk_fopen(fname, "rb", "gk_dreadfilebin"); + + if (fread(array, sizeof(double), nelmnts, fpin) != nelmnts) { + gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts); + gk_free((void **)&array, LTERM); + return NULL; + } + gk_fclose(fpin); + + *r_nelmnts = nelmnts; + + return array; +} + +/*************************************************************************/ +/*! This function writes the contents of an array into a binary file. + \param fname is the name of the file + \param n the number of elements in the array. + \param a the array to be written out. +*/ +/*************************************************************************/ +size_t gk_dwritefilebin(char *fname, size_t n, double *a) +{ + size_t fsize; + FILE *fp; + + fp = gk_fopen(fname, "wb", "gk_writefilebin"); + + fsize = fwrite(a, sizeof(double), n, fp); + + gk_fclose(fp); + + return fsize; +} + diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/itemsets.c b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/itemsets.c new file mode 100644 index 00000000..beb58aea --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/itemsets.c @@ -0,0 +1,210 @@ +/*! + * \file + * \brief Frequent/Closed itemset discovery routines + * + * This file contains the code for finding frequent/closed itemests. These routines + * are implemented using a call-back mechanism to deal with the discovered itemsets. + * + * \date 6/13/2008 + * \author George Karypis + * \version\verbatim $Id: itemsets.c 19240 2015-10-22 12:41:19Z karypis $ \endverbatim + */ + +#include + +/*-------------------------------------------------------------*/ +/*! Data structures for use within this module */ +/*-------------------------------------------------------------*/ +typedef struct { + int minfreq; /* the minimum frequency of a pattern */ + int maxfreq; /* the maximum frequency of a pattern */ + int minlen; /* the minimum length of the requested pattern */ + int maxlen; /* the maximum length of the requested pattern */ + int tnitems; /* the initial range of the item space */ + + /* the call-back function */ + void (*callback)(void *stateptr, int nitems, int *itemids, int ntrans, int *transids); + void *stateptr; /* the user-supplied pointer to pass to the callback */ + + /* workspace variables */ + int *rmarker; + gk_ikv_t *cand; +} isparams_t; + + +/*-------------------------------------------------------------*/ +/*! Prototypes for this module */ +/*-------------------------------------------------------------*/ +void itemsets_find_frequent_itemsets(isparams_t *params, gk_csr_t *mat, + int preflen, int *prefix); +gk_csr_t *itemsets_project_matrix(isparams_t *param, gk_csr_t *mat, int cid); + + + +/*************************************************************************/ +/*! The entry point of the frequent itemset discovery code */ +/*************************************************************************/ +void gk_find_frequent_itemsets(int ntrans, ssize_t *tranptr, int *tranind, + int minfreq, int maxfreq, int minlen, int maxlen, + void (*process_itemset)(void *stateptr, int nitems, int *itemids, + int ntrans, int *transids), + void *stateptr) +{ + ssize_t i; + gk_csr_t *mat, *pmat; + isparams_t params; + int *pattern; + + /* Create the matrix */ + mat = gk_csr_Create(); + mat->nrows = ntrans; + mat->ncols = tranind[gk_iargmax(tranptr[ntrans], tranind, 1)]+1; + mat->rowptr = gk_zcopy(ntrans+1, tranptr, gk_zmalloc(ntrans+1, "gk_find_frequent_itemsets: mat.rowptr")); + mat->rowind = gk_icopy(tranptr[ntrans], tranind, gk_imalloc(tranptr[ntrans], "gk_find_frequent_itemsets: mat.rowind")); + mat->colids = gk_iincset(mat->ncols, 0, gk_imalloc(mat->ncols, "gk_find_frequent_itemsets: mat.colids")); + + /* Setup the parameters */ + params.minfreq = minfreq; + params.maxfreq = (maxfreq == -1 ? mat->nrows : maxfreq); + params.minlen = minlen; + params.maxlen = (maxlen == -1 ? mat->ncols : maxlen); + params.tnitems = mat->ncols; + params.callback = process_itemset; + params.stateptr = stateptr; + params.rmarker = gk_ismalloc(mat->nrows, 0, "gk_find_frequent_itemsets: rmarker"); + params.cand = gk_ikvmalloc(mat->ncols, "gk_find_frequent_itemsets: cand"); + + /* Perform the initial projection */ + gk_csr_CreateIndex(mat, GK_CSR_COL); + pmat = itemsets_project_matrix(¶ms, mat, -1); + gk_csr_Free(&mat); + + pattern = gk_imalloc(pmat->ncols, "gk_find_frequent_itemsets: pattern"); + itemsets_find_frequent_itemsets(¶ms, pmat, 0, pattern); + + gk_csr_Free(&pmat); + gk_free((void **)&pattern, ¶ms.rmarker, ¶ms.cand, LTERM); + +} + + + +/*************************************************************************/ +/*! The recursive routine for DFS-based frequent pattern discovery */ +/*************************************************************************/ +void itemsets_find_frequent_itemsets(isparams_t *params, gk_csr_t *mat, + int preflen, int *prefix) +{ + ssize_t i; + gk_csr_t *cmat; + + /* Project each frequent column */ + for (i=0; incols; i++) { + prefix[preflen] = mat->colids[i]; + + if (preflen+1 >= params->minlen) + (*params->callback)(params->stateptr, preflen+1, prefix, + mat->colptr[i+1]-mat->colptr[i], mat->colind+mat->colptr[i]); + + if (preflen+1 < params->maxlen) { + cmat = itemsets_project_matrix(params, mat, i); + itemsets_find_frequent_itemsets(params, cmat, preflen+1, prefix); + gk_csr_Free(&cmat); + } + } + +} + + +/******************************************************************************/ +/*! This function projects a matrix w.r.t. to a particular column. + It performs the following steps: + - Determines the length of each column that is remaining. + - Sorts the columns in increasing length. + - Creates a column-based version of the matrix with the proper + column ordering. + */ +/*******************************************************************************/ +gk_csr_t *itemsets_project_matrix(isparams_t *params, gk_csr_t *mat, int cid) +{ + ssize_t i, j, k, ii, pnnz; + int nrows, ncols, pnrows, pncols; + ssize_t *colptr, *pcolptr; + int *colind, *colids, *pcolind, *pcolids, *rmarker; + gk_csr_t *pmat; + gk_ikv_t *cand; + + nrows = mat->nrows; + ncols = mat->ncols; + colptr = mat->colptr; + colind = mat->colind; + colids = mat->colids; + + rmarker = params->rmarker; + cand = params->cand; + + + /* Allocate space for the projected matrix based on what you know thus far */ + pmat = gk_csr_Create(); + pmat->nrows = pnrows = (cid == -1 ? nrows : colptr[cid+1]-colptr[cid]); + + + /* Mark the rows that will be kept and determine the prowids */ + if (cid == -1) { /* Initial projection */ + gk_iset(nrows, 1, rmarker); + } + else { /* The other projections */ + for (i=colptr[cid]; i= params->minfreq && k <= params->maxfreq) { + cand[pncols].val = i; + cand[pncols++].key = k; + pnnz += k; + } + } + + /* Sort the columns in increasing order */ + gk_ikvsorti(pncols, cand); + + + /* Allocate space for the remaining fields of the projected matrix */ + pmat->ncols = pncols; + pmat->colids = pcolids = gk_imalloc(pncols, "itemsets_project_matrix: pcolids"); + pmat->colptr = pcolptr = gk_zmalloc(pncols+1, "itemsets_project_matrix: pcolptr"); + pmat->colind = pcolind = gk_imalloc(pnnz, "itemsets_project_matrix: pcolind"); + + + /* Populate the projected matrix */ + pcolptr[0] = 0; + for (pnnz=0, ii=0; ii + + +/*************************************************************************/ +/*! This function creates an mcore + */ +/*************************************************************************/ +gk_mcore_t *gk_mcoreCreate(size_t coresize) +{ + gk_mcore_t *mcore; + + mcore = (gk_mcore_t *)gk_malloc(sizeof(gk_mcore_t), "gk_mcoreCreate: mcore"); + memset(mcore, 0, sizeof(gk_mcore_t)); + + mcore->coresize = coresize; + mcore->corecpos = 0; + + mcore->core = (coresize == 0 ? NULL : gk_malloc(mcore->coresize, "gk_mcoreCreate: core")); + + /* allocate the memory for keeping track of malloc ops */ + mcore->nmops = 2048; + mcore->cmop = 0; + mcore->mops = (gk_mop_t *)gk_malloc(mcore->nmops*sizeof(gk_mop_t), "gk_mcoreCreate: mcore->mops"); + + return mcore; +} + + +/*************************************************************************/ +/*! This function creates an mcore. This version is used for gkmcore. + */ +/*************************************************************************/ +gk_mcore_t *gk_gkmcoreCreate() +{ + gk_mcore_t *mcore; + + if ((mcore = (gk_mcore_t *)malloc(sizeof(gk_mcore_t))) == NULL) + return NULL; + memset(mcore, 0, sizeof(gk_mcore_t)); + + /* allocate the memory for keeping track of malloc ops */ + mcore->nmops = 2048; + mcore->cmop = 0; + if ((mcore->mops = (gk_mop_t *)malloc(mcore->nmops*sizeof(gk_mop_t))) == NULL) { + free(mcore); + return NULL; + } + + return mcore; +} + + +/*************************************************************************/ +/*! This function destroys an mcore. + */ +/*************************************************************************/ +void gk_mcoreDestroy(gk_mcore_t **r_mcore, int showstats) +{ + gk_mcore_t *mcore = *r_mcore; + + if (mcore == NULL) + return; + + if (showstats) + printf("\n gk_mcore statistics\n" + " coresize: %12zu nmops: %12zu cmop: %6zu\n" + " num_callocs: %12zu num_hallocs: %12zu\n" + " size_callocs: %12zu size_hallocs: %12zu\n" + " cur_callocs: %12zu cur_hallocs: %12zu\n" + " max_callocs: %12zu max_hallocs: %12zu\n", + mcore->coresize, mcore->nmops, mcore->cmop, + mcore->num_callocs, mcore->num_hallocs, + mcore->size_callocs, mcore->size_hallocs, + mcore->cur_callocs, mcore->cur_hallocs, + mcore->max_callocs, mcore->max_hallocs); + + if (mcore->cur_callocs != 0 || mcore->cur_hallocs != 0 || mcore->cmop != 0) { + printf("***Warning: mcore memory was not fully freed when destroyed.\n" + " cur_callocs: %6zu cur_hallocs: %6zu cmop: %6zu\n", + mcore->cur_callocs, mcore->cur_hallocs, mcore->cmop); + } + + gk_free((void **)&mcore->core, &mcore->mops, &mcore, LTERM); + + *r_mcore = NULL; +} + + +/*************************************************************************/ +/*! This function destroys an mcore. This version is for gkmcore. + */ +/*************************************************************************/ +void gk_gkmcoreDestroy(gk_mcore_t **r_mcore, int showstats) +{ + gk_mcore_t *mcore = *r_mcore; + + if (mcore == NULL) + return; + + if (showstats) + printf("\n gk_mcore statistics\n" + " nmops: %12zu cmop: %6zu\n" + " num_hallocs: %12zu\n" + " size_hallocs: %12zu\n" + " cur_hallocs: %12zu\n" + " max_hallocs: %12zu\n", + mcore->nmops, mcore->cmop, + mcore->num_hallocs, + mcore->size_hallocs, + mcore->cur_hallocs, + mcore->max_hallocs); + + if (mcore->cur_hallocs != 0 || mcore->cmop != 0) { + printf("***Warning: mcore memory was not fully freed when destroyed.\n" + " cur_hallocs: %6zu cmop: %6zu\n", + mcore->cur_hallocs, mcore->cmop); + } + + free(mcore->mops); + free(mcore); + + *r_mcore = NULL; +} + + +/*************************************************************************/ +/*! This function allocate space from the core/heap + */ +/*************************************************************************/ +void *gk_mcoreMalloc(gk_mcore_t *mcore, size_t nbytes) +{ + void *ptr; + + /* pad to make pointers 8-byte aligned */ + nbytes += (nbytes%8 == 0 ? 0 : 8 - nbytes%8); + + if (mcore->corecpos + nbytes < mcore->coresize) { + /* service this request from the core */ + ptr = ((char *)mcore->core)+mcore->corecpos; + mcore->corecpos += nbytes; + + gk_mcoreAdd(mcore, GK_MOPT_CORE, nbytes, ptr); + } + else { + /* service this request from the heap */ + ptr = gk_malloc(nbytes, "gk_mcoremalloc: ptr"); + + gk_mcoreAdd(mcore, GK_MOPT_HEAP, nbytes, ptr); + } + + /* + printf("MCMALLOC: %zu %d %8zu\n", mcore->cmop-1, + mcore->mops[mcore->cmop-1].type, mcore->mops[mcore->cmop-1].nbytes); + */ + + return ptr; +} + + +/*************************************************************************/ +/*! This function sets a marker in the stack of malloc ops to be used + subsequently for freeing purposes + */ +/*************************************************************************/ +void gk_mcorePush(gk_mcore_t *mcore) +{ + gk_mcoreAdd(mcore, GK_MOPT_MARK, 0, NULL); + /* printf("MCPPUSH: %zu\n", mcore->cmop-1); */ +} + + +/*************************************************************************/ +/*! This function sets a marker in the stack of malloc ops to be used + subsequently for freeing purposes. This is the gkmcore version. + */ +/*************************************************************************/ +void gk_gkmcorePush(gk_mcore_t *mcore) +{ + gk_gkmcoreAdd(mcore, GK_MOPT_MARK, 0, NULL); + /* printf("MCPPUSH: %zu\n", mcore->cmop-1); */ +} + + +/*************************************************************************/ +/*! This function frees all mops since the last push + */ +/*************************************************************************/ +void gk_mcorePop(gk_mcore_t *mcore) +{ + while (mcore->cmop > 0) { + mcore->cmop--; + switch (mcore->mops[mcore->cmop].type) { + case GK_MOPT_MARK: /* push marker */ + goto DONE; + break; + + case GK_MOPT_CORE: /* core free */ + if (mcore->corecpos < mcore->mops[mcore->cmop].nbytes) + errexit("Internal Error: wspace's core is about to be over-freed [%zu, %zu, %zd]\n", + mcore->coresize, mcore->corecpos, mcore->mops[mcore->cmop].nbytes); + + mcore->corecpos -= mcore->mops[mcore->cmop].nbytes; + mcore->cur_callocs -= mcore->mops[mcore->cmop].nbytes; + break; + + case GK_MOPT_HEAP: /* heap free */ + gk_free((void **)&mcore->mops[mcore->cmop].ptr, LTERM); + mcore->cur_hallocs -= mcore->mops[mcore->cmop].nbytes; + break; + + default: + gk_errexit(SIGMEM, "Unknown mop type of %d\n", mcore->mops[mcore->cmop].type); + } + } + +DONE: + ; + /*printf("MCPPOP: %zu\n", mcore->cmop); */ +} + + +/*************************************************************************/ +/*! This function frees all mops since the last push. This version is + for poping the gkmcore and it uses free instead of gk_free. + */ +/*************************************************************************/ +void gk_gkmcorePop(gk_mcore_t *mcore) +{ + while (mcore->cmop > 0) { + mcore->cmop--; + switch (mcore->mops[mcore->cmop].type) { + case GK_MOPT_MARK: /* push marker */ + goto DONE; + break; + + case GK_MOPT_HEAP: /* heap free */ + free(mcore->mops[mcore->cmop].ptr); + mcore->cur_hallocs -= mcore->mops[mcore->cmop].nbytes; + break; + + default: + gk_errexit(SIGMEM, "Unknown mop type of %d\n", mcore->mops[mcore->cmop].type); + } + } + +DONE: + ; +} + + +/*************************************************************************/ +/*! Adds a memory allocation at the end of the list. + */ +/*************************************************************************/ +void gk_mcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr) +{ + if (mcore->cmop == mcore->nmops) { + mcore->nmops *= 2; + mcore->mops = realloc(mcore->mops, mcore->nmops*sizeof(gk_mop_t)); + if (mcore->mops == NULL) + gk_errexit(SIGMEM, "***Memory allocation for gkmcore failed.\n"); + } + + mcore->mops[mcore->cmop].type = type; + mcore->mops[mcore->cmop].nbytes = nbytes; + mcore->mops[mcore->cmop].ptr = ptr; + mcore->cmop++; + + switch (type) { + case GK_MOPT_MARK: + break; + + case GK_MOPT_CORE: + mcore->num_callocs++; + mcore->size_callocs += nbytes; + mcore->cur_callocs += nbytes; + if (mcore->max_callocs < mcore->cur_callocs) + mcore->max_callocs = mcore->cur_callocs; + break; + + case GK_MOPT_HEAP: + mcore->num_hallocs++; + mcore->size_hallocs += nbytes; + mcore->cur_hallocs += nbytes; + if (mcore->max_hallocs < mcore->cur_hallocs) + mcore->max_hallocs = mcore->cur_hallocs; + break; + default: + gk_errexit(SIGMEM, "Incorrect mcore type operation.\n"); + } +} + + +/*************************************************************************/ +/*! Adds a memory allocation at the end of the list. This is the gkmcore + version. + */ +/*************************************************************************/ +void gk_gkmcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr) +{ + if (mcore->cmop == mcore->nmops) { + mcore->nmops *= 2; + mcore->mops = realloc(mcore->mops, mcore->nmops*sizeof(gk_mop_t)); + if (mcore->mops == NULL) + gk_errexit(SIGMEM, "***Memory allocation for gkmcore failed.\n"); + } + + mcore->mops[mcore->cmop].type = type; + mcore->mops[mcore->cmop].nbytes = nbytes; + mcore->mops[mcore->cmop].ptr = ptr; + mcore->cmop++; + + switch (type) { + case GK_MOPT_MARK: + break; + + case GK_MOPT_HEAP: + mcore->num_hallocs++; + mcore->size_hallocs += nbytes; + mcore->cur_hallocs += nbytes; + if (mcore->max_hallocs < mcore->cur_hallocs) + mcore->max_hallocs = mcore->cur_hallocs; + break; + default: + gk_errexit(SIGMEM, "Incorrect mcore type operation.\n"); + } +} + + +/*************************************************************************/ +/*! This function deletes the mop associated with the supplied pointer. + The mop has to be a heap allocation, otherwise it fails violently. + */ +/*************************************************************************/ +void gk_mcoreDel(gk_mcore_t *mcore, void *ptr) +{ + int i; + + for (i=mcore->cmop-1; i>=0; i--) { + if (mcore->mops[i].type == GK_MOPT_MARK) + gk_errexit(SIGMEM, "Could not find pointer %p in mcore\n", ptr); + + if (mcore->mops[i].ptr == ptr) { + if (mcore->mops[i].type != GK_MOPT_HEAP) + gk_errexit(SIGMEM, "Trying to delete a non-HEAP mop.\n"); + + mcore->cur_hallocs -= mcore->mops[i].nbytes; + mcore->mops[i] = mcore->mops[--mcore->cmop]; + return; + } + } + + gk_errexit(SIGMEM, "mcoreDel should never have been here!\n"); +} + + +/*************************************************************************/ +/*! This function deletes the mop associated with the supplied pointer. + The mop has to be a heap allocation, otherwise it fails violently. + This is the gkmcore version. + */ +/*************************************************************************/ +void gk_gkmcoreDel(gk_mcore_t *mcore, void *ptr) +{ + int i; + + for (i=mcore->cmop-1; i>=0; i--) { + if (mcore->mops[i].type == GK_MOPT_MARK) + gk_errexit(SIGMEM, "Could not find pointer %p in mcore\n", ptr); + + if (mcore->mops[i].ptr == ptr) { + if (mcore->mops[i].type != GK_MOPT_HEAP) + gk_errexit(SIGMEM, "Trying to delete a non-HEAP mop.\n"); + + mcore->cur_hallocs -= mcore->mops[i].nbytes; + mcore->mops[i] = mcore->mops[--mcore->cmop]; + return; + } + } + + gk_errexit(SIGMEM, "gkmcoreDel should never have been here!\n"); +} + diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/memory.c b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/memory.c new file mode 100644 index 00000000..e6dc99c4 --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/memory.c @@ -0,0 +1,307 @@ +/*! +\file memory.c +\brief This file contains various allocation routines + +The allocation routines included are for 1D and 2D arrays of the +most datatypes that GKlib support. Many of these routines are +defined with the help of the macros in gk_memory.h. These macros +can be used to define other memory allocation routines. + +\date Started 4/3/2007 +\author George +\version\verbatim $Id: memory.c 21050 2017-05-25 03:53:58Z karypis $ \endverbatim +*/ + + +#include + +/* This is for the global mcore that tracks all heap allocations */ +static __thread gk_mcore_t *gkmcore = NULL; + + +/*************************************************************************/ +/*! Define the set of memory allocation routines for each data type */ +/**************************************************************************/ +GK_MKALLOC(gk_c, char) +GK_MKALLOC(gk_i, int) +GK_MKALLOC(gk_i8, int8_t) +GK_MKALLOC(gk_i16, int16_t) +GK_MKALLOC(gk_i32, int32_t) +GK_MKALLOC(gk_i64, int64_t) +GK_MKALLOC(gk_ui8, uint8_t) +GK_MKALLOC(gk_ui16, uint16_t) +GK_MKALLOC(gk_ui32, uint32_t) +GK_MKALLOC(gk_ui64, uint64_t) +GK_MKALLOC(gk_z, ssize_t) +GK_MKALLOC(gk_zu, size_t) +GK_MKALLOC(gk_f, float) +GK_MKALLOC(gk_d, double) +GK_MKALLOC(gk_idx, gk_idx_t) + +GK_MKALLOC(gk_ckv, gk_ckv_t) +GK_MKALLOC(gk_ikv, gk_ikv_t) +GK_MKALLOC(gk_i8kv, gk_i8kv_t) +GK_MKALLOC(gk_i16kv, gk_i16kv_t) +GK_MKALLOC(gk_i32kv, gk_i32kv_t) +GK_MKALLOC(gk_i64kv, gk_i64kv_t) +GK_MKALLOC(gk_zkv, gk_zkv_t) +GK_MKALLOC(gk_zukv, gk_zukv_t) +GK_MKALLOC(gk_fkv, gk_fkv_t) +GK_MKALLOC(gk_dkv, gk_dkv_t) +GK_MKALLOC(gk_skv, gk_skv_t) +GK_MKALLOC(gk_idxkv, gk_idxkv_t) + + + + + + +/*************************************************************************/ +/*! This function allocates a two-dimensional matrix. + */ +/*************************************************************************/ +void gk_AllocMatrix(void ***r_matrix, size_t elmlen, size_t ndim1, size_t ndim2) +{ + size_t i, j; + void **matrix; + + *r_matrix = NULL; + + if ((matrix = (void **)gk_malloc(ndim1*sizeof(void *), "gk_AllocMatrix: matrix")) == NULL) + return; + + for (i=0; icmop == 0) { + gk_gkmcoreDestroy(&gkmcore, showstats); + gkmcore = NULL; + } + } +} + + +/*************************************************************************/ +/*! This function is my wrapper around malloc that provides the following + enhancements over malloc: + * It always allocates one byte of memory, even if 0 bytes are requested. + This is to ensure that checks of returned values do not lead to NULL + due to 0 bytes requested. + * It zeros-out the memory that is allocated. This is for a quick init + of the underlying datastructures. +*/ +/**************************************************************************/ +void *gk_malloc(size_t nbytes, char *msg) +{ + void *ptr=NULL; + + if (nbytes == 0) + nbytes++; /* Force mallocs to actually allocate some memory */ + + ptr = (void *)malloc(nbytes); + + if (ptr == NULL) { + fprintf(stderr, " Current memory used: %10zu bytes\n", gk_GetCurMemoryUsed()); + fprintf(stderr, " Maximum memory used: %10zu bytes\n", gk_GetMaxMemoryUsed()); + gk_errexit(SIGMEM, "***Memory allocation failed for %s. Requested size: %zu bytes", + msg, nbytes); + return NULL; + } + + /* add this memory allocation */ + if (gkmcore != NULL) gk_gkmcoreAdd(gkmcore, GK_MOPT_HEAP, nbytes, ptr); + + return ptr; +} + + +/************************************************************************* +* This function is my wrapper around realloc +**************************************************************************/ +void *gk_realloc(void *oldptr, size_t nbytes, char *msg) +{ + void *ptr=NULL; + + if (nbytes == 0) + nbytes++; /* Force mallocs to actually allocate some memory */ + + /* remove this memory de-allocation */ + if (gkmcore != NULL && oldptr != NULL) gk_gkmcoreDel(gkmcore, oldptr); + + ptr = (void *)realloc(oldptr, nbytes); + + if (ptr == NULL) { + fprintf(stderr, " Maximum memory used: %10zu bytes\n", gk_GetMaxMemoryUsed()); + fprintf(stderr, " Current memory used: %10zu bytes\n", gk_GetCurMemoryUsed()); + gk_errexit(SIGMEM, "***Memory realloc failed for %s. " "Requested size: %zu bytes", + msg, nbytes); + return NULL; + } + + /* add this memory allocation */ + if (gkmcore != NULL) gk_gkmcoreAdd(gkmcore, GK_MOPT_HEAP, nbytes, ptr); + + return ptr; +} + + +/************************************************************************* +* This function is my wrapper around free, allows multiple pointers +**************************************************************************/ +void gk_free(void **ptr1,...) +{ + va_list plist; + void **ptr; + + if (*ptr1 != NULL) { + free(*ptr1); + + /* remove this memory de-allocation */ + if (gkmcore != NULL) + gk_gkmcoreDel(gkmcore, *ptr1); + } + *ptr1 = NULL; + + va_start(plist, ptr1); + while ((ptr = va_arg(plist, void **)) != LTERM) { + if (*ptr != NULL) { + free(*ptr); + + /* remove this memory de-allocation */ + if (gkmcore != NULL) + gk_gkmcoreDel(gkmcore, *ptr); + } + *ptr = NULL; + } + va_end(plist); +} + + +/************************************************************************* +* This function returns the current ammount of dynamically allocated +* memory that is used by the system +**************************************************************************/ +size_t gk_GetCurMemoryUsed() +{ + if (gkmcore == NULL) + return 0; + else + return gkmcore->cur_hallocs; +} + + +/************************************************************************* +* This function returns the maximum ammount of dynamically allocated +* memory that was used by the system +**************************************************************************/ +size_t gk_GetMaxMemoryUsed() +{ + if (gkmcore == NULL) + return 0; + else + return gkmcore->max_hallocs; +} + + +/*************************************************************************/ +/*! This function returns the VmSize and VmRSS of the calling process. */ +/*************************************************************************/ +void gk_GetVMInfo(size_t *vmsize, size_t *vmrss) +{ + FILE *fp; + char fname[1024]; + + sprintf(fname, "/proc/%d/statm", getpid()); + fp = gk_fopen(fname, "r", "proc/pid/statm"); + if (fscanf(fp, "%zu %zu", vmsize, vmrss) != 2) + errexit("Failed to read to values from %s\n", fname); + gk_fclose(fp); + + /* + *vmsize *= sysconf(_SC_PAGESIZE); + *vmrss *= sysconf(_SC_PAGESIZE); + */ + + return; +} + + +/*************************************************************************/ +/*! This function returns the peak virtual memory of the calling process + by reading the VmPeak field in /proc/self/status . */ +/*************************************************************************/ +size_t gk_GetProcVmPeak() +{ + FILE *fp; + char line[128]; + size_t vmpeak=0; + + if (gk_fexists("/proc/self/status")) { + fp = gk_fopen("/proc/self/status", "r", "proc/self/status"); + while (fgets(line, 128, fp) != NULL) { + if (strncmp(line, "VmPeak:", 7) == 0) { + vmpeak = atoll(line+8)*1024; + break; + } + } + gk_fclose(fp); + } + + return vmpeak; +} diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/pqueue.c b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/pqueue.c new file mode 100644 index 00000000..2fb8515d --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/pqueue.c @@ -0,0 +1,25 @@ +/*! +\file pqueue.c +\brief This file implements various max-priority queues. + +The priority queues are generated using the GK_MKPQUEUE macro. + +\date Started 3/27/2007 +\author George +\version\verbatim $Id: pqueue.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim +*/ + +#include + + +/*************************************************************************/ +/*! Create the various max priority queues */ +/*************************************************************************/ +#define key_gt(a, b) ((a) > (b)) +GK_MKPQUEUE(gk_ipq, gk_ipq_t, gk_ikv_t, int, gk_idx_t, gk_ikvmalloc, INT_MAX, key_gt) +GK_MKPQUEUE(gk_i32pq, gk_i32pq_t, gk_i32kv_t, int32_t, gk_idx_t, gk_i32kvmalloc, INT32_MAX, key_gt) +GK_MKPQUEUE(gk_i64pq, gk_i64pq_t, gk_i64kv_t, int64_t, gk_idx_t, gk_i64kvmalloc, INT64_MAX, key_gt) +GK_MKPQUEUE(gk_fpq, gk_fpq_t, gk_fkv_t, float, gk_idx_t, gk_fkvmalloc, FLT_MAX, key_gt) +GK_MKPQUEUE(gk_dpq, gk_dpq_t, gk_dkv_t, double, gk_idx_t, gk_dkvmalloc, DBL_MAX, key_gt) +GK_MKPQUEUE(gk_idxpq, gk_idxpq_t, gk_idxkv_t, gk_idx_t, gk_idx_t, gk_idxkvmalloc, GK_IDX_MAX, key_gt) +#undef key_gt diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/random.c b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/random.c new file mode 100644 index 00000000..36986146 --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/random.c @@ -0,0 +1,136 @@ +/*! +\file +\brief Various routines for providing portable 32 and 64 bit random number + generators. + +\date Started 5/17/2007 +\author George +\version\verbatim $Id: random.c 18796 2015-06-02 11:39:45Z karypis $ \endverbatim +*/ + +#include + + +/*************************************************************************/ +/*! Create the various random number functions */ +/*************************************************************************/ +GK_MKRANDOM(gk_c, size_t, char) +GK_MKRANDOM(gk_i, size_t, int) +GK_MKRANDOM(gk_i32, size_t, int32_t) +GK_MKRANDOM(gk_f, size_t, float) +GK_MKRANDOM(gk_d, size_t, double) +GK_MKRANDOM(gk_idx, size_t, gk_idx_t) +GK_MKRANDOM(gk_z, size_t, ssize_t) +GK_MKRANDOM(gk_zu, size_t, size_t) + + + +/*************************************************************************/ +/*! GKlib's built in random number generator for portability across + different architectures */ +/*************************************************************************/ +#ifdef USE_GKRAND +/* + A C-program for MT19937-64 (2004/9/29 version). + Coded by Takuji Nishimura and Makoto Matsumoto. + + This is a 64-bit version of Mersenne Twister pseudorandom number + generator. + + Before using, initialize the state by using init_genrand64(seed) + or init_by_array64(init_key, key_length). + + Copyright (C) 2004, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#define NN 312 +#define MM 156 +#define MATRIX_A 0xB5026F5AA96619E9ULL +#define UM 0xFFFFFFFF80000000ULL /* Most significant 33 bits */ +#define LM 0x7FFFFFFFULL /* Least significant 31 bits */ + + +/* The array for the state vector */ +static uint64_t mt[NN]; +/* mti==NN+1 means mt[NN] is not initialized */ +static int mti=NN+1; +#endif /* USE_GKRAND */ + +/* initializes mt[NN] with a seed */ +void gk_randinit(uint64_t seed) +{ +#ifdef USE_GKRAND + mt[0] = seed; + for (mti=1; mti> 62)) + mti); +#else + srand((unsigned int) seed); +#endif +} + + +/* generates a random number on [0, 2^64-1]-interval */ +uint64_t gk_randint64(void) +{ +#ifdef USE_GKRAND + int i; + unsigned long long x; + static uint64_t mag01[2]={0ULL, MATRIX_A}; + + if (mti >= NN) { /* generate NN words at one time */ + /* if init_genrand64() has not been called, */ + /* a default initial seed is used */ + if (mti == NN+1) + gk_randinit(5489ULL); + + for (i=0; i>1) ^ mag01[(int)(x&1ULL)]; + } + for (; i>1) ^ mag01[(int)(x&1ULL)]; + } + x = (mt[NN-1]&UM)|(mt[0]&LM); + mt[NN-1] = mt[MM-1] ^ (x>>1) ^ mag01[(int)(x&1ULL)]; + + mti = 0; + } + + x = mt[mti++]; + + x ^= (x >> 29) & 0x5555555555555555ULL; + x ^= (x << 17) & 0x71D67FFFEDA60000ULL; + x ^= (x << 37) & 0xFFF7EEE000000000ULL; + x ^= (x >> 43); + + return x & 0x7FFFFFFFFFFFFFFF; +#else + return (uint64_t)(((uint64_t) rand()) << 32 | ((uint64_t) rand())); +#endif +} + +/* generates a random number on [0, 2^32-1]-interval */ +uint32_t gk_randint32(void) +{ +#ifdef USE_GKRAND + return (uint32_t)(gk_randint64() & 0x7FFFFFFF); +#else + return (uint32_t)rand(); +#endif +} + + diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/rw.c b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/rw.c new file mode 100644 index 00000000..7cd4391a --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/rw.c @@ -0,0 +1,103 @@ +/*! + * \file + * + * \brief Various routines that perform random-walk based operations + on graphs stored as gk_csr_t matrices. + * + * \author George Karypis + * \version\verbatim $Id: rw.c 11078 2011-11-12 00:20:44Z karypis $ \endverbatim + */ + +#include + + +/*************************************************************************/ +/*! Computes the (personalized) page-rank of the vertices in a graph. + + \param mat is the matrix storing the graph. + \param lamda is the restart probability. + \param eps is the error tolerance for convergance. + \param max_niter is the maximum number of allowed iterations. + \param pr on entry stores the restart distribution of the vertices. + This allows for the computation of personalized page-rank scores + by appropriately setting that parameter. + On return, pr stores the computed page ranks. + + \returns the number of iterations that were performed. +*/ +/**************************************************************************/ +int gk_rw_PageRank(gk_csr_t *mat, float lamda, float eps, int max_niter, float *pr) +{ + ssize_t i, j, k, iter, nrows; + double *rscale, *prold, *prnew, *prtmp; + double fromsinks, error; + ssize_t *rowptr; + int *rowind; + float *rowval; + + nrows = mat->nrows; + rowptr = mat->rowptr; + rowind = mat->rowind; + rowval = mat->rowval; + + prold = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: prnew"); + prnew = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: prold"); + rscale = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: rscale"); + + /* compute the scaling factors to get adjacency weights into transition + probabilities */ + for (i=0; i 0) + rscale[i] = 1.0/rscale[i]; + } + + /* the restart distribution is the initial pr scores */ + for (i=0; i error ? fabs(prnew[i]-prold[i]) : error); + + //printf("nrm1: %le maxfabserr: %le\n", gk_dsum(nrows, prnew, 1), error); + + if (error < eps) + break; + } + + /* store the computed pr scores into pr for output */ + for (i=0; i + + + + +/*********************************************************/ +/* ! \brief Initializes the gk_seq_t variable + + + + +\param A pointer to gk_seq_t itself +\returns null +*/ +/***********************************************************************/ + +void gk_seq_init(gk_seq_t *seq) +{ + + seq->len = 0; + seq->sequence = NULL; + + seq->pssm = NULL; + seq->psfm = NULL; + + seq->name = NULL; + +} + +/***********************************************************************/ +/*! \brief This function creates the localizations for the various sequences + +\param string i.e amino acids, nucleotides, sequences +\returns gk_i2cc2i_t variable +*/ +/*********************************************************************/ + +gk_i2cc2i_t *gk_i2cc2i_create_common(char *alphabet) +{ + + + int nsymbols; + gk_idx_t i; + gk_i2cc2i_t *t; + + nsymbols = strlen(alphabet); + t = gk_malloc(sizeof(gk_i2cc2i_t),"gk_i2c_create_common"); + t->n = nsymbols; + t->i2c = gk_cmalloc(256, "gk_i2c_create_common"); + t->c2i = gk_imalloc(256, "gk_i2c_create_common"); + + + gk_cset(256, -1, t->i2c); + gk_iset(256, -1, t->c2i); + + for(i=0;ii2c[i] = alphabet[i]; + t->c2i[(int)alphabet[i]] = i; + } + + return t; + +} + + +/*********************************************************************/ +/*! \brief This function reads a pssm in the format of gkmod pssm + +\param file_name is the name of the pssm file +\returns gk_seq_t +*/ +/********************************************************************/ +gk_seq_t *gk_seq_ReadGKMODPSSM(char *filename) +{ + gk_seq_t *seq; + gk_idx_t i, j, ii; + size_t ntokens, nbytes, len; + FILE *fpin; + + + gk_Tokens_t tokens; + static char *AAORDER = "ARNDCQEGHILKMFPSTWYVBZX*"; + static int PSSMWIDTH = 20; + char *header, line[MAXLINELEN]; + gk_i2cc2i_t *converter; + + header = gk_cmalloc(PSSMWIDTH, "gk_seq_ReadGKMODPSSM: header"); + + converter = gk_i2cc2i_create_common(AAORDER); + + gk_getfilestats(filename, &len, &ntokens, NULL, &nbytes); + len --; + + seq = gk_malloc(sizeof(gk_seq_t),"gk_seq_ReadGKMODPSSM"); + gk_seq_init(seq); + + seq->len = len; + seq->sequence = gk_imalloc(len, "gk_seq_ReadGKMODPSSM"); + seq->pssm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM"); + seq->psfm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM"); + + seq->nsymbols = PSSMWIDTH; + seq->name = gk_getbasename(filename); + + fpin = gk_fopen(filename,"r","gk_seq_ReadGKMODPSSM"); + + + /* Read the header line */ + if (fgets(line, MAXLINELEN-1, fpin) == NULL) + errexit("Unexpected end of file: %s\n", filename); + gk_strtoupper(line); + gk_strtokenize(line, " \t\n", &tokens); + + for (i=0; isequence[i] = converter->c2i[(int)tokens.list[1][0]]; + + for (j=0; jpssm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+j]); + seq->psfm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+PSSMWIDTH+j]); + } + + + + gk_freetokenslist(&tokens); + i++; + } + + seq->len = i; /* Reset the length if certain characters were skipped */ + + gk_free((void **)&header, LTERM); + gk_fclose(fpin); + + return seq; +} + + +/**************************************************************************/ +/*! \brief This function frees the memory allocated to the seq structure. + +\param gk_seq_t +\returns nothing +*/ +/**************************************************************************/ +void gk_seq_free(gk_seq_t *seq) +{ + gk_iFreeMatrix(&seq->pssm, seq->len, seq->nsymbols); + gk_iFreeMatrix(&seq->psfm, seq->len, seq->nsymbols); + gk_free((void **)&seq->name, &seq->sequence, LTERM); + //gk_free((void **)&seq, LTERM); + gk_free((void **) &seq, LTERM); + +} diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/sort.c b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/sort.c new file mode 100644 index 00000000..f0144aea --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/sort.c @@ -0,0 +1,437 @@ +/*! +\file sort.c +\brief This file contains GKlib's various sorting routines + +These routines are implemented using the GKSORT macro that is defined +in gk_qsort.h and is based on GNU's GLIBC qsort() implementation. + +Additional sorting routines can be created using the same way that +these routines where defined. + +\date Started 4/4/07 +\author George +\version\verbatim $Id: sort.c 21050 2017-05-25 03:53:58Z karypis $ \endverbatim +*/ + +#include + + + +/*************************************************************************/ +/*! Sorts an array of chars in increasing order */ +/*************************************************************************/ +void gk_csorti(size_t n, char *base) +{ +#define char_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(char, base, n, char_lt); +#undef char_lt +} + + +/*************************************************************************/ +/*! Sorts an array of chars in decreasing order */ +/*************************************************************************/ +void gk_csortd(size_t n, char *base) +{ +#define char_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(char, base, n, char_gt); +#undef char_gt +} + + +/*************************************************************************/ +/*! Sorts an array of integers in increasing order */ +/*************************************************************************/ +void gk_isorti(size_t n, int *base) +{ +#define int_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(int, base, n, int_lt); +#undef int_lt +} + + +/*************************************************************************/ +/*! Sorts an array of integers in decreasing order */ +/*************************************************************************/ +void gk_isortd(size_t n, int *base) +{ +#define int_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(int, base, n, int_gt); +#undef int_gt +} + + +/*************************************************************************/ +/*! Sorts an array of integers in increasing order */ +/*************************************************************************/ +void gk_i32sorti(size_t n, int32_t *base) +{ +#define int_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(int32_t, base, n, int_lt); +#undef int_lt +} + + +/*************************************************************************/ +/*! Sorts an array of integers in decreasing order */ +/*************************************************************************/ +void gk_i32sortd(size_t n, int32_t *base) +{ +#define int_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(int32_t, base, n, int_gt); +#undef int_gt +} + + +/*************************************************************************/ +/*! Sorts an array of integers in increasing order */ +/*************************************************************************/ +void gk_i64sorti(size_t n, int64_t *base) +{ +#define int_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(int64_t, base, n, int_lt); +#undef int_lt +} + + +/*************************************************************************/ +/*! Sorts an array of integers in increasing order */ +/*************************************************************************/ +void gk_ui32sorti(size_t n, uint32_t *base) +{ +#define int_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(uint32_t, base, n, int_lt); +#undef int_lt +} + + +/*************************************************************************/ +/*! Sorts an array of integers in decreasing order */ +/*************************************************************************/ +void gk_ui32sortd(size_t n, uint32_t *base) +{ +#define int_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(uint32_t, base, n, int_gt); +#undef int_gt +} + + +/*************************************************************************/ +/*! Sorts an array of integers in increasing order */ +/*************************************************************************/ +void gk_ui64sorti(size_t n, uint64_t *base) +{ +#define int_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(uint64_t, base, n, int_lt); +#undef int_lt +} + + +/*************************************************************************/ +/*! Sorts an array of integers in decreasing order */ +/*************************************************************************/ +void gk_ui64sortd(size_t n, uint64_t *base) +{ +#define int_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(uint64_t, base, n, int_gt); +#undef int_gt +} + + +/*************************************************************************/ +/*! Sorts an array of integers in decreasing order */ +/*************************************************************************/ +void gk_i64sortd(size_t n, int64_t *base) +{ +#define int_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(int64_t, base, n, int_gt); +#undef int_gt +} + + +/*************************************************************************/ +/*! Sorts an array of floats in increasing order */ +/*************************************************************************/ +void gk_fsorti(size_t n, float *base) +{ +#define float_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(float, base, n, float_lt); +#undef float_lt +} + + +/*************************************************************************/ +/*! Sorts an array of floats in decreasing order */ +/*************************************************************************/ +void gk_fsortd(size_t n, float *base) +{ +#define float_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(float, base, n, float_gt); +#undef float_gt +} + + +/*************************************************************************/ +/*! Sorts an array of doubles in increasing order */ +/*************************************************************************/ +void gk_dsorti(size_t n, double *base) +{ +#define double_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(double, base, n, double_lt); +#undef double_lt +} + + +/*************************************************************************/ +/*! Sorts an array of doubles in decreasing order */ +/*************************************************************************/ +void gk_dsortd(size_t n, double *base) +{ +#define double_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(double, base, n, double_gt); +#undef double_gt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_idx_t in increasing order */ +/*************************************************************************/ +void gk_idxsorti(size_t n, gk_idx_t *base) +{ +#define idx_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(gk_idx_t, base, n, idx_lt); +#undef idx_lt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_idx_t in decreasing order */ +/*************************************************************************/ +void gk_idxsortd(size_t n, gk_idx_t *base) +{ +#define idx_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(gk_idx_t, base, n, idx_gt); +#undef idx_gt +} + + + + +/*************************************************************************/ +/*! Sorts an array of gk_ckv_t in increasing order */ +/*************************************************************************/ +void gk_ckvsorti(size_t n, gk_ckv_t *base) +{ +#define ckey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(gk_ckv_t, base, n, ckey_lt); +#undef ckey_lt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_ckv_t in decreasing order */ +/*************************************************************************/ +void gk_ckvsortd(size_t n, gk_ckv_t *base) +{ +#define ckey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(gk_ckv_t, base, n, ckey_gt); +#undef ckey_gt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_ikv_t in increasing order */ +/*************************************************************************/ +void gk_ikvsorti(size_t n, gk_ikv_t *base) +{ +#define ikey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(gk_ikv_t, base, n, ikey_lt); +#undef ikey_lt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_ikv_t in decreasing order */ +/*************************************************************************/ +void gk_ikvsortd(size_t n, gk_ikv_t *base) +{ +#define ikey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(gk_ikv_t, base, n, ikey_gt); +#undef ikey_gt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_i32kv_t in increasing order */ +/*************************************************************************/ +void gk_i32kvsorti(size_t n, gk_i32kv_t *base) +{ +#define ikey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(gk_i32kv_t, base, n, ikey_lt); +#undef ikey_lt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_i32kv_t in decreasing order */ +/*************************************************************************/ +void gk_i32kvsortd(size_t n, gk_i32kv_t *base) +{ +#define ikey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(gk_i32kv_t, base, n, ikey_gt); +#undef ikey_gt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_i64kv_t in increasing order */ +/*************************************************************************/ +void gk_i64kvsorti(size_t n, gk_i64kv_t *base) +{ +#define ikey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(gk_i64kv_t, base, n, ikey_lt); +#undef ikey_lt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_i64kv_t in decreasing order */ +/*************************************************************************/ +void gk_i64kvsortd(size_t n, gk_i64kv_t *base) +{ +#define ikey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(gk_i64kv_t, base, n, ikey_gt); +#undef ikey_gt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_zkv_t in increasing order */ +/*************************************************************************/ +void gk_zkvsorti(size_t n, gk_zkv_t *base) +{ +#define zkey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(gk_zkv_t, base, n, zkey_lt); +#undef zkey_lt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_zkv_t in decreasing order */ +/*************************************************************************/ +void gk_zkvsortd(size_t n, gk_zkv_t *base) +{ +#define zkey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(gk_zkv_t, base, n, zkey_gt); +#undef zkey_gt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_zukv_t in increasing order */ +/*************************************************************************/ +void gk_zukvsorti(size_t n, gk_zukv_t *base) +{ +#define zukey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(gk_zukv_t, base, n, zukey_lt); +#undef zukey_lt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_zukv_t in decreasing order */ +/*************************************************************************/ +void gk_zukvsortd(size_t n, gk_zukv_t *base) +{ +#define zukey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(gk_zukv_t, base, n, zukey_gt); +#undef zukey_gt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_fkv_t in increasing order */ +/*************************************************************************/ +void gk_fkvsorti(size_t n, gk_fkv_t *base) +{ +#define fkey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(gk_fkv_t, base, n, fkey_lt); +#undef fkey_lt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_fkv_t in decreasing order */ +/*************************************************************************/ +void gk_fkvsortd(size_t n, gk_fkv_t *base) +{ +#define fkey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(gk_fkv_t, base, n, fkey_gt); +#undef fkey_gt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_dkv_t in increasing order */ +/*************************************************************************/ +void gk_dkvsorti(size_t n, gk_dkv_t *base) +{ +#define dkey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(gk_dkv_t, base, n, dkey_lt); +#undef dkey_lt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_fkv_t in decreasing order */ +/*************************************************************************/ +void gk_dkvsortd(size_t n, gk_dkv_t *base) +{ +#define dkey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(gk_dkv_t, base, n, dkey_gt); +#undef dkey_gt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_skv_t in increasing order */ +/*************************************************************************/ +void gk_skvsorti(size_t n, gk_skv_t *base) +{ +#define skey_lt(a, b) (strcmp((a)->key, (b)->key) < 0) + GK_MKQSORT(gk_skv_t, base, n, skey_lt); +#undef skey_lt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_skv_t in decreasing order */ +/*************************************************************************/ +void gk_skvsortd(size_t n, gk_skv_t *base) +{ +#define skey_gt(a, b) (strcmp((a)->key, (b)->key) > 0) + GK_MKQSORT(gk_skv_t, base, n, skey_gt); +#undef skey_gt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_idxkv_t in increasing order */ +/*************************************************************************/ +void gk_idxkvsorti(size_t n, gk_idxkv_t *base) +{ +#define idxkey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(gk_idxkv_t, base, n, idxkey_lt); +#undef idxkey_lt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_idxkv_t in decreasing order */ +/*************************************************************************/ +void gk_idxkvsortd(size_t n, gk_idxkv_t *base) +{ +#define idxkey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(gk_idxkv_t, base, n, idxkey_gt); +#undef idxkey_gt +} diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/string.c b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/string.c new file mode 100644 index 00000000..4a3fb140 --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/string.c @@ -0,0 +1,530 @@ +/************************************************************************/ +/*! \file + +\brief Functions for manipulating strings. + +Various functions for manipulating strings. Some of these functions +provide new functionality, whereas others are drop-in replacements +of standard functions (but with enhanced functionality). + +\date Started 11/1/99 +\author George +\version $Id: string.c 14330 2013-05-18 12:15:15Z karypis $ +*/ +/************************************************************************/ + +/* the following is for strptime() */ +#define _XOPEN_SOURCE +#include +#undef _XOPEN_SOURCE + +#include + + + +/************************************************************************/ +/*! \brief Replaces certain characters in a string. + +This function takes a string and replaces all the characters in the +\c fromlist with the corresponding characters from the \c tolist. +That is, each occurence of fromlist[i] is replaced by +tolist[i]. +If the \c tolist is shorter than \c fromlist, then the corresponding +characters are deleted. The modifications on \c str are done in place. +It tries to provide a functionality similar to Perl's \b tr// function. + +\param str is the string whose characters will be replaced. +\param fromlist is the set of characters to be replaced. +\param tolist is the set of replacement characters . +\returns A pointer to \c str itself. +*/ +/************************************************************************/ +char *gk_strchr_replace(char *str, char *fromlist, char *tolist) +{ + ssize_t i, j, k, len, fromlen, tolen; + + len = strlen(str); + fromlen = strlen(fromlist); + tolen = strlen(tolist); + + for (i=j=0; i s// regular-expression +based substitution function. + +\param str + is the input string on which the operation will be performed. +\param pattern + is the regular expression for the pattern to be matched for substitution. +\param replacement + is the replacement string, in which the possible captured pattern substrings + are referred to as $1, $2, ..., $9. The entire matched pattern is refered + to as $0. +\param options + is a string specified options for the substitution operation. Currently the + "i" (case insensitive) and "g" (global substitution) are + supported. +\param new_str + is a reference to a pointer that will store a pointer to the newly created + string that results from the substitutions. This string is allocated via + gk_malloc() and needs to be freed using gk_free(). The string is returned + even if no substitutions were performed. +\returns + If successful, it returns 1 + the number of substitutions that were performed. + Thus, if no substitutions were performed, the returned value will be 1. + Otherwise it returns 0. In case of error, a meaningful error message is + returned in newstr, which also needs to be freed afterwards. +*/ +/************************************************************************/ +int gk_strstr_replace(char *str, char *pattern, char *replacement, char *options, + char **new_str) +{ + ssize_t i, len, rlen, nlen, offset, noffset; + int j, rc, flags, global, nmatches; + regex_t re; + regmatch_t matches[10]; + + + /* Parse the options */ + flags = REG_EXTENDED; + if (strchr(options, 'i') != NULL) + flags = flags | REG_ICASE; + global = (strchr(options, 'g') != NULL ? 1 : 0); + + + /* Compile the regex */ + if ((rc = regcomp(&re, pattern, flags)) != 0) { + len = regerror(rc, &re, NULL, 0); + *new_str = gk_cmalloc(len, "gk_strstr_replace: new_str"); + regerror(rc, &re, *new_str, len); + return 0; + } + + /* Prepare the output string */ + len = strlen(str); + nlen = 2*len; + noffset = 0; + *new_str = gk_cmalloc(nlen+1, "gk_strstr_replace: new_str"); + + + /* Get into the matching-replacing loop */ + rlen = strlen(replacement); + offset = 0; + nmatches = 0; + do { + rc = regexec(&re, str+offset, 10, matches, 0); + + if (rc == REG_ESPACE) { + gk_free((void **)new_str, LTERM); + *new_str = gk_strdup("regexec ran out of memory."); + regfree(&re); + return 0; + } + else if (rc == REG_NOMATCH) { + if (nlen-noffset < len-offset) { + nlen += (len-offset) - (nlen-noffset); + *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str"); + } + strcpy(*new_str+noffset, str+offset); + noffset += (len-offset); + break; + } + else { /* A match was found! */ + nmatches++; + + /* Copy the left unmatched portion of the string */ + if (matches[0].rm_so > 0) { + if (nlen-noffset < matches[0].rm_so) { + nlen += matches[0].rm_so - (nlen-noffset); + *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str"); + } + strncpy(*new_str+noffset, str+offset, matches[0].rm_so); + noffset += matches[0].rm_so; + } + + /* Go and append the replacement string */ + for (i=0; i 9) { + gk_free((void **)new_str, LTERM); + *new_str = gk_strdup("Error in captured subexpression specification."); + regfree(&re); + return 0; + } + + if (nlen-noffset < matches[j].rm_eo-matches[j].rm_so) { + nlen += nlen + (matches[j].rm_eo-matches[j].rm_so); + *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str"); + } + + strncpy(*new_str+noffset, str+offset+matches[j].rm_so, matches[j].rm_eo); + noffset += matches[j].rm_eo-matches[j].rm_so; + } + else { + gk_free((void **)new_str, LTERM); + *new_str = gk_strdup("Error in replacement string. Missing subexpression number folloing '$'."); + regfree(&re); + return 0; + } + break; + + default: + if (nlen-noffset < 1) { + nlen += nlen + 1; + *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str"); + } + (*new_str)[noffset++] = replacement[i]; + } + } + + /* Update the offset of str for the next match */ + offset += matches[0].rm_eo; + + if (!global) { + /* Copy the right portion of the string if no 'g' option */ + if (nlen-noffset < len-offset) { + nlen += (len-offset) - (nlen-noffset); + *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str"); + } + strcpy(*new_str+noffset, str+offset); + noffset += (len-offset); + } + } + } while (global); + + (*new_str)[noffset] = '\0'; + + regfree(&re); + return nmatches + 1; + +} + + + +/************************************************************************/ +/*! \brief Prunes characters from the end of the string. + +This function removes any trailing characters that are included in the +\c rmlist. The trimming stops at the last character (i.e., first character +from the end) that is not in \c rmlist. +This function can be used to removed trailing spaces, newlines, etc. +This is a distructive operation as it modifies the string. + +\param str is the string that will be trimmed. +\param rmlist contains the set of characters that will be removed. +\returns A pointer to \c str itself. +\sa gk_strhprune() +*/ +/*************************************************************************/ +char *gk_strtprune(char *str, char *rmlist) +{ + ssize_t i, j, len; + + len = strlen(rmlist); + + for (i=strlen(str)-1; i>=0; i--) { + for (j=0; j0) { /* If something needs to be removed */ + for (j=0; str[i]; i++, j++) + str[j] = str[i]; + str[j] = '\0'; + } + + return str; +} + + +/************************************************************************/ +/*! \brief Converts a string to upper case. + +This function converts a string to upper case. This operation modifies the +string itself. + +\param str is the string whose case will be changed. +\returns A pointer to \c str itself. +\sa gk_strtolower() +*/ +/*************************************************************************/ +char *gk_strtoupper(char *str) +{ + int i; + + for (i=0; str[i]!='\0'; str[i]=toupper(str[i]), i++); + return str; +} + + +/************************************************************************/ +/*! \brief Converts a string to lower case. + +This function converts a string to lower case. This operation modifies the +string itself. + +\param str is the string whose case will be changed. +\returns A pointer to \c str itself. +\sa gk_strtoupper() +*/ +/*************************************************************************/ +char *gk_strtolower(char *str) +{ + int i; + + for (i=0; str[i]!='\0'; str[i]=tolower(str[i]), i++); + return str; +} + + +/************************************************************************/ +/*! \brief Duplicates a string + +This function is a replacement for C's standard strdup() function. +The key differences between the two are that gk_strdup(): + - uses the dynamic memory allocation routines of \e GKlib. + - it correctly handles NULL input strings. + +The string that is returned must be freed by gk_free(). + +\param orgstr is the string that will be duplicated. +\returns A pointer to the newly created string. +\sa gk_free() +*/ +/*************************************************************************/ +char *gk_strdup(char *orgstr) +{ + int len; + char *str=NULL; + + if (orgstr != NULL) { + len = strlen(orgstr)+1; + str = gk_malloc(len*sizeof(char), "gk_strdup: str"); + strcpy(str, orgstr); + } + + return str; +} + + +/************************************************************************/ +/*! \brief Case insensitive string comparison. + +This function compares two strings for equality by ignoring the case of the +strings. + +\warning This function is \b not equivalent to a case-insensitive + strcmp() function, as it does not return ordering + information. + +\todo Remove the above warning. + +\param s1 is the first string to be compared. +\param s2 is the second string to be compared. +\retval 1 if the strings are identical, +\retval 0 otherwise. +*/ +/*************************************************************************/ +int gk_strcasecmp(char *s1, char *s2) +{ + int i=0; + + if (strlen(s1) != strlen(s2)) + return 0; + + while (s1[i] != '\0') { + if (tolower(s1[i]) != tolower(s2[i])) + return 0; + i++; + } + + return 1; +} + + +/************************************************************************/ +/*! \brief Compare two strings in revere order + +This function is similar to strcmp but it performs the comparison as +if the two strings were reversed. + +\param s1 is the first string to be compared. +\param s2 is the second string to be compared. +\retval -1, 0, 1, if the s1 < s2, s1 == s2, or s1 > s2. +*/ +/*************************************************************************/ +int gk_strrcmp(char *s1, char *s2) +{ + int i1 = strlen(s1)-1; + int i2 = strlen(s2)-1; + + while ((i1 >= 0) && (i2 >= 0)) { + if (s1[i1] != s2[i2]) + return (s1[i1] - s2[i2]); + i1--; + i2--; + } + + /* i1 == -1 and/or i2 == -1 */ + + if (i1 < i2) + return -1; + if (i1 > i2) + return 1; + return 0; +} + + + +/************************************************************************/ +/*! \brief Converts a time_t time into a string + +This function takes a time_t-specified time and returns a string-formated +representation of the corresponding time. The format of the string is +mm/dd/yyyy hh:mm:ss, in which the hours are in military time. + +\param time is the time to be converted. +\return It returns a pointer to a statically allocated string that is + over-written in successive calls of this function. If the + conversion failed, it returns NULL. + +*/ +/*************************************************************************/ +char *gk_time2str(time_t time) +{ + static char datestr[128]; + struct tm *tm; + + tm = localtime(&time); + + if (strftime(datestr, 128, "%m/%d/%Y %H:%M:%S", tm) == 0) + return NULL; + else + return datestr; +} + + + +#if !defined(WIN32) && !defined(__MINGW32__) +/************************************************************************/ +/*! \brief Converts a date/time string into its equivalent time_t value + +This function takes date and/or time specification and converts it in +the equivalent time_t representation. The conversion is done using the +strptime() function. The format that gk_str2time() understands is +mm/dd/yyyy hh:mm:ss, in which the hours are in military time. + +\param str is the date/time string to be converted. +\return If the conversion was successful it returns the time, otherwise + it returns -1. +*/ +/*************************************************************************/ +time_t gk_str2time(char *str) +{ + struct tm time; + time_t rtime; + + memset(&time, '\0', sizeof(time)); + + if (strptime(str, "%m/%d/%Y %H:%M:%S", &time) == NULL) + return -1; + + rtime = mktime(&time); + return (rtime < 0 ? 0 : rtime); +} +#endif + + +/************************************************************************* +* This function returns the ID of a particular string based on the +* supplied StringMap array +**************************************************************************/ +int gk_GetStringID(gk_StringMap_t *strmap, char *key) +{ + int i; + + for (i=0; strmap[i].name; i++) { + if (gk_strcasecmp(key, strmap[i].name)) + return strmap[i].id; + } + + return -1; +} diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/timers.c b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/timers.c new file mode 100644 index 00000000..bb8f2962 --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/timers.c @@ -0,0 +1,52 @@ +/*! +\file timers.c +\brief Various timing functions + +\date Started 4/12/2007 +\author George +\version\verbatim $Id: timers.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim +*/ + + +#include + + + + +/************************************************************************* +* This function returns the CPU seconds +**************************************************************************/ +double gk_WClockSeconds(void) +{ +#ifdef __GNUC__ + struct timeval ctime; + + gettimeofday(&ctime, NULL); + + return (double)ctime.tv_sec + (double).000001*ctime.tv_usec; +#else + return (double)time(NULL); +#endif +} + + +/************************************************************************* +* This function returns the CPU seconds +**************************************************************************/ +double gk_CPUSeconds(void) +{ +//#ifdef __OPENMP__ +#ifdef __OPENMPXXXX__ + return omp_get_wtime(); +#else + #if defined(WIN32) || defined(__MINGW32__) + return((double) clock()/CLOCKS_PER_SEC); + #else + struct rusage r; + + getrusage(RUSAGE_SELF, &r); + return ((r.ru_utime.tv_sec + r.ru_stime.tv_sec) + 1.0e-6*(r.ru_utime.tv_usec + r.ru_stime.tv_usec)); + #endif +#endif +} + diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/tokenizer.c b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/tokenizer.c new file mode 100644 index 00000000..5efd262d --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/tokenizer.c @@ -0,0 +1,77 @@ +/*! +\file tokenizer.c +\brief String tokenization routines + +This file contains various routines for splitting an input string into +tokens and returning them in form of a list. The goal is to mimic perl's +split function. + +\date Started 11/23/04 +\author George +\version\verbatim $Id: tokenizer.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim +*/ + + +#include + + +/************************************************************************ +* This function tokenizes a string based on the user-supplied delimiters +* list. The resulting tokens are returned into an array of strings. +*************************************************************************/ +void gk_strtokenize(char *str, char *delim, gk_Tokens_t *tokens) +{ + int i, ntoks, slen; + + tokens->strbuf = gk_strdup(str); + + slen = strlen(str); + str = tokens->strbuf; + + /* Scan once to determine the number of tokens */ + for (ntoks=0, i=0; intoks = ntoks; + tokens->list = (char **)gk_malloc(ntoks*sizeof(char *), "strtokenize: tokens->list"); + + + /* Scan a second time to mark and link the tokens */ + for (ntoks=0, i=0; ilist[ntoks++] = str+i; + + /* Consume all the consecutive characters from the token */ + while (ilist, &tokens->strbuf, LTERM); +} + diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/win32/adapt.c b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/win32/adapt.c new file mode 100644 index 00000000..546857c5 --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/win32/adapt.c @@ -0,0 +1,11 @@ +/* +\file win32/adapt.c +\brief Implementation of Win32 adaptation of libc functions +*/ + +#include "adapt.h" + +pid_t getpid(void) +{ + return GetCurrentProcessId(); +} diff --git a/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/win32/adapt.h b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/win32/adapt.h new file mode 100644 index 00000000..35e60ed6 --- /dev/null +++ b/Code/ThirdParty/gklib_svfsi/simvascular_gklib_svfsi/win32/adapt.h @@ -0,0 +1,14 @@ +/* +\file win32/adapt.h +\brief Declaration of Win32 adaptation of POSIX functions and types +*/ +#ifndef _WIN32_ADAPT_H_ +#define _WIN32_ADAPT_H_ + +#include + +typedef DWORD pid_t; + +pid_t getpid(void); + +#endif /* _WIN32_ADAPT_H_ */ diff --git a/Code/ThirdParty/metis_svfsi/README.md b/Code/ThirdParty/metis_svfsi/README.md new file mode 100644 index 00000000..a56397b7 --- /dev/null +++ b/Code/ThirdParty/metis_svfsi/README.md @@ -0,0 +1,6 @@ + +The METIS library contains serial programs for partitioning graphs, partitioning finite element meshes, and +producing fill reducing orderings for sparse matrices. + +The v5.2 version of the source was downloaded from https://github.com/KarypisLab/METIS. + diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/CMakeLists.txt b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/CMakeLists.txt index 86740b09..451d4583 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/CMakeLists.txt +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/CMakeLists.txt @@ -1,17 +1,14 @@ include_directories(./) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../../gklib_svfsi/simvascular_gklib_svfsi) include_directories(${MPI_C_INCLUDE_PATH}) -set(CSRCS coarsen.c fm.c initpart.c match.c ccgraph.c - pmetis.c pqueue.c refine.c util.c timing.c debug.c - bucketsort.c graph.c stat.c kmetis.c kwayrefine.c - kwayfm.c balance.c ometis.c srefine.c sfm.c separator.c - mincover.c mmd.c mesh.c meshpart.c frename.c fortran.c - myqsort.c compress.c parmetis.c estmem.c - mpmetis.c mcoarsen.c mmatch.c minitpart.c mbalance.c - mutil.c mkmetis.c mkwayrefine.c mkwayfmh.c - mrefine2.c minitpart2.c mbalance2.c mfm2.c - kvmetis.c kwayvolrefine.c kwayvolfm.c subdomains.c - mfm.c memory.c mrefine.c checkgraph.c) +set(CSRCS auxapi.c balance.c bucketsort.c checkgraph.c coarsen.c + compress.c contig.c debug.c fm.c fortran.c frename.c gklib.c + graph.c initpart.c kmetis.c kwayfm.c kwayrefine.c mcutil.c + mesh.c meshpart.c minconn.c mincover.c mmd.c ometis.c + options.c parmetis.c pmetis.c refine.c separator.c sfm.c + srefine.c stat.c timing.c util.c wspace.c +) if(SV_USE_THIRDPARTY_SHARED_LIBRARIES) add_library(${METIS_SVFSI_LIBRARY_NAME} SHARED ${CSRCS}) diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/auxapi.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/auxapi.c new file mode 100644 index 00000000..bd7b48f8 --- /dev/null +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/auxapi.c @@ -0,0 +1,43 @@ +/** +\file +\brief This file contains various helper API routines for using METIS. + +\date Started 5/12/2011 +\author George +\author Copyright 1997-2009, Regents of the University of Minnesota +\version\verbatim $Id: auxapi.c 10409 2011-06-25 16:58:34Z karypis $ \endverbatim +*/ + + +#include "metislib.h" + + +/*************************************************************************/ +/*! This function frees memory that was allocated by METIS and returns + to the application. + + \param ptr points to the memory that was previously allocated by + METIS. +*/ +/*************************************************************************/ +int METIS_Free(void *ptr) +{ + if (ptr != NULL) free(ptr); + return METIS_OK; +} + + +/*************************************************************************/ +/*! This function sets the default values for the options. + + \param options points to an array of size at least METIS_NOPTIONS. +*/ +/*************************************************************************/ +int METIS_SetDefaultOptions(idx_t *options) +{ + iset(METIS_NOPTIONS, -1, options); + + return METIS_OK; +} + + diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/balance.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/balance.c index ac951da7..4ff70bff 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/balance.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/balance.c @@ -1,104 +1,102 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * balance.c - * - * This file contains code that is used to forcefully balance either - * bisections or k-sections - * - * Started 7/29/97 - * George - * - * $Id: balance.c,v 1.1 2003/07/16 15:54:58 karypis Exp $ - * - */ - -#include +/*! +\file +\brief Functions for the edge-based balancing + +\date Started 7/23/97 +\author George +\author Copyright 1997-2011, Regents of the University of Minnesota +\version\verbatim $Id: balance.c 10187 2011-06-13 13:46:57Z karypis $ \endverbatim +*/ + +#include "metislib.h" /************************************************************************* -* This function is the entry point of the bisection balancing algorithms. +* This function is the entry poidx_t of the bisection balancing algorithms. **************************************************************************/ -void Balance2Way(CtrlType *ctrl, GraphType *graph, int *tpwgts, float ubfactor) +void Balance2Way(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts) { - int i, j, nvtxs, from, imax, gain, mindiff; - idxtype *id, *ed; - - /* Return right away if the balance is OK */ - mindiff = abs(tpwgts[0]-graph->pwgts[0]); - if (mindiff < 3*(graph->pwgts[0]+graph->pwgts[1])/graph->nvtxs) - return; - if (graph->pwgts[0] > tpwgts[0] && graph->pwgts[0] < (int)(ubfactor*tpwgts[0])) - return; - if (graph->pwgts[1] > tpwgts[1] && graph->pwgts[1] < (int)(ubfactor*tpwgts[1])) + if (ComputeLoadImbalanceDiff(graph, 2, ctrl->pijbm, ctrl->ubfactors) <= 0) return; - if (graph->nbnd > 0) - Bnd2WayBalance(ctrl, graph, tpwgts); - else - General2WayBalance(ctrl, graph, tpwgts); + if (graph->ncon == 1) { + /* return right away if the balance is OK */ + if (rabs(ntpwgts[0]*graph->tvwgt[0]-graph->pwgts[0]) < 3*graph->tvwgt[0]/graph->nvtxs) + return; + if (graph->nbnd > 0) + Bnd2WayBalance(ctrl, graph, ntpwgts); + else + General2WayBalance(ctrl, graph, ntpwgts); + } + else { + McGeneral2WayBalance(ctrl, graph, ntpwgts); + } } - /************************************************************************* * This function balances two partitions by moving boundary nodes * from the domain that is overweight to the one that is underweight. **************************************************************************/ -void Bnd2WayBalance(CtrlType *ctrl, GraphType *graph, int *tpwgts) +void Bnd2WayBalance(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts) { - int i, ii, j, k, kwgt, nvtxs, nbnd, nswaps, from, to, pass, me, tmp; - idxtype *xadj, *vwgt, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind, *pwgts; - idxtype *moved, *perm; - PQueueType parts; - int higain, oldgain, mincut, mindiff; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - vwgt = graph->vwgt; + idx_t i, ii, j, k, kwgt, nvtxs, nbnd, nswaps, from, to, pass, me, tmp; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind, *pwgts; + idx_t *moved, *perm; + rpq_t *queue; + idx_t higain, mincut, mindiff; + idx_t tpwgts[2]; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vwgt = graph->vwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; - where = graph->where; - id = graph->id; - ed = graph->ed; - pwgts = graph->pwgts; + where = graph->where; + id = graph->id; + ed = graph->ed; + pwgts = graph->pwgts; bndptr = graph->bndptr; bndind = graph->bndind; - moved = idxwspacemalloc(ctrl, nvtxs); - perm = idxwspacemalloc(ctrl, nvtxs); + moved = iwspacemalloc(ctrl, nvtxs); + perm = iwspacemalloc(ctrl, nvtxs); /* Determine from which domain you will be moving data */ - mindiff = abs(tpwgts[0]-pwgts[0]); - from = (pwgts[0] < tpwgts[0] ? 1 : 0); - to = (from+1)%2; + tpwgts[0] = graph->tvwgt[0]*ntpwgts[0]; + tpwgts[1] = graph->tvwgt[0] - tpwgts[0]; + mindiff = iabs(tpwgts[0]-pwgts[0]); + from = (pwgts[0] < tpwgts[0] ? 1 : 0); + to = (from+1)%2; - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("Partitions: [%6d %6d] T[%6d %6d], Nv-Nb[%6d %6d]. ICut: %6d [B]\n", - pwgts[0], pwgts[1], tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("Partitions: [%6"PRIDX" %6"PRIDX"] T[%6"PRIDX" %6"PRIDX"], Nv-Nb[%6"PRIDX" %6"PRIDX"]. ICut: %6"PRIDX" [B]\n", + pwgts[0], pwgts[1], tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, + graph->mincut)); - tmp = graph->adjwgtsum[idxamax(nvtxs, graph->adjwgtsum)]; - PQueueInit(ctrl, &parts, nvtxs, tmp); + queue = rpqCreate(nvtxs); - idxset(nvtxs, -1, moved); + iset(nvtxs, -1, moved); ASSERT(ComputeCut(graph, where) == graph->mincut); ASSERT(CheckBnd(graph)); /* Insert the boundary nodes of the proper partition whose size is OK in the priority queue */ nbnd = graph->nbnd; - RandomPermute(nbnd, perm, 1); + irandArrayPermute(nbnd, perm, nbnd/5, 1); for (ii=0; ii 0 || id[bndind[i]] == 0); ASSERT(bndptr[bndind[i]] != -1); if (where[bndind[i]] == from && vwgt[bndind[i]] <= mindiff) - PQueueInsert(&parts, bndind[i], ed[bndind[i]]-id[bndind[i]]); + rpqInsert(queue, bndind[i], ed[bndind[i]]-id[bndind[i]]); } mincut = graph->mincut; for (nswaps=0; nswapsdbglvl, DBG_MOVEINFO, - printf("Moved %6d from %d. [%3d %3d] %5d [%4d %4d]\n", higain, from, ed[higain]-id[higain], vwgt[higain], mincut, pwgts[0], pwgts[1])); + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("Moved %6"PRIDX" from %"PRIDX". [%3"PRIDX" %3"PRIDX"] %5"PRIDX" [%4"PRIDX" %4"PRIDX"]\n", higain, from, ed[higain]-id[higain], vwgt[higain], mincut, pwgts[0], pwgts[1])); /************************************************************** * Update the id[i]/ed[i] values of the affected nodes @@ -123,8 +121,6 @@ void Bnd2WayBalance(CtrlType *ctrl, GraphType *graph, int *tpwgts) for (j=xadj[higain]; j 0) { /* It will now become a boundary vertex */ BNDInsert(nbnd, bndind, bndptr, k); if (moved[k] == -1 && where[k] == from && vwgt[k] <= mindiff) - PQueueInsert(&parts, k, ed[k]-id[k]); + rpqInsert(queue, k, ed[k]-id[k]); } } } } - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("\tMinimum cut: %6d, PWGTS: [%6d %6d], NBND: %6d\n", mincut, pwgts[0], pwgts[1], nbnd)); + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("\tMinimum cut: %6"PRIDX", PWGTS: [%6"PRIDX" %6"PRIDX"], NBND: %6"PRIDX"\n", mincut, pwgts[0], pwgts[1], nbnd)); graph->mincut = mincut; - graph->nbnd = nbnd; + graph->nbnd = nbnd; - PQueueFree(ctrl, &parts); + rpqDestroy(queue); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); + WCOREPOP; } /************************************************************************* * This function balances two partitions by moving the highest gain * (including negative gain) vertices to the other domain. -* It is used only when tha unbalance is due to non contigous +* It is used only when the unbalance is due to non contiguous * subdomains. That is, the are no boundary vertices. * It moves vertices from the domain that is overweight to the one that * is underweight. **************************************************************************/ -void General2WayBalance(CtrlType *ctrl, GraphType *graph, int *tpwgts) +void General2WayBalance(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts) { - int i, ii, j, k, kwgt, nvtxs, nbnd, nswaps, from, to, pass, me, tmp; - idxtype *xadj, *vwgt, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind, *pwgts; - idxtype *moved, *perm; - PQueueType parts; - int higain, oldgain, mincut, mindiff; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - vwgt = graph->vwgt; + idx_t i, ii, j, k, kwgt, nvtxs, nbnd, nswaps, from, to, pass, me, tmp; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind, *pwgts; + idx_t *moved, *perm; + rpq_t *queue; + idx_t higain, mincut, mindiff; + idx_t tpwgts[2]; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vwgt = graph->vwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; - where = graph->where; - id = graph->id; - ed = graph->ed; - pwgts = graph->pwgts; + where = graph->where; + id = graph->id; + ed = graph->ed; + pwgts = graph->pwgts; bndptr = graph->bndptr; bndind = graph->bndind; - moved = idxwspacemalloc(ctrl, nvtxs); - perm = idxwspacemalloc(ctrl, nvtxs); + moved = iwspacemalloc(ctrl, nvtxs); + perm = iwspacemalloc(ctrl, nvtxs); /* Determine from which domain you will be moving data */ - mindiff = abs(tpwgts[0]-pwgts[0]); - from = (pwgts[0] < tpwgts[0] ? 1 : 0); - to = (from+1)%2; - - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("Partitions: [%6d %6d] T[%6d %6d], Nv-Nb[%6d %6d]. ICut: %6d [B]\n", + tpwgts[0] = graph->tvwgt[0]*ntpwgts[0]; + tpwgts[1] = graph->tvwgt[0] - tpwgts[0]; + mindiff = iabs(tpwgts[0]-pwgts[0]); + from = (pwgts[0] < tpwgts[0] ? 1 : 0); + to = (from+1)%2; + + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("Partitions: [%6"PRIDX" %6"PRIDX"] T[%6"PRIDX" %6"PRIDX"], Nv-Nb[%6"PRIDX" %6"PRIDX"]. ICut: %6"PRIDX" [B]\n", pwgts[0], pwgts[1], tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); - tmp = graph->adjwgtsum[idxamax(nvtxs, graph->adjwgtsum)]; - PQueueInit(ctrl, &parts, nvtxs, tmp); + queue = rpqCreate(nvtxs); - idxset(nvtxs, -1, moved); + iset(nvtxs, -1, moved); ASSERT(ComputeCut(graph, where) == graph->mincut); ASSERT(CheckBnd(graph)); /* Insert the nodes of the proper partition whose size is OK in the priority queue */ - RandomPermute(nvtxs, perm, 1); + irandArrayPermute(nvtxs, perm, nvtxs/5, 1); for (ii=0; iimincut; nbnd = graph->nbnd; for (nswaps=0; nswaps tpwgts[to]) @@ -234,8 +233,8 @@ void General2WayBalance(CtrlType *ctrl, GraphType *graph, int *tpwgts) where[higain] = to; moved[higain] = nswaps; - IFSET(ctrl->dbglvl, DBG_MOVEINFO, - printf("Moved %6d from %d. [%3d %3d] %5d [%4d %4d]\n", higain, from, ed[higain]-id[higain], vwgt[higain], mincut, pwgts[0], pwgts[1])); + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("Moved %6"PRIDX" from %"PRIDX". [%3"PRIDX" %3"PRIDX"] %5"PRIDX" [%4"PRIDX" %4"PRIDX"]\n", higain, from, ed[higain]-id[higain], vwgt[higain], mincut, pwgts[0], pwgts[1])); /************************************************************** * Update the id[i]/ed[i] values of the affected nodes @@ -248,14 +247,198 @@ void General2WayBalance(CtrlType *ctrl, GraphType *graph, int *tpwgts) for (j=xadj[higain]; j 0 && bndptr[k] == -1) + BNDInsert(nbnd, bndind, bndptr, k); + } + } + + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("\tMinimum cut: %6"PRIDX", PWGTS: [%6"PRIDX" %6"PRIDX"], NBND: %6"PRIDX"\n", mincut, pwgts[0], pwgts[1], nbnd)); + + graph->mincut = mincut; + graph->nbnd = nbnd; + + rpqDestroy(queue); + + WCOREPOP; +} + + +/************************************************************************* +* This function performs an edge-based FM refinement +**************************************************************************/ +void McGeneral2WayBalance(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts) +{ + idx_t i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, + me, limit, tmp, cnum; + idx_t *xadj, *adjncy, *vwgt, *adjwgt, *where, *pwgts, *id, *ed, *bndptr, *bndind; + idx_t *moved, *swaps, *perm, *qnum, *qsizes; + idx_t higain, mincut, newcut, mincutorder; + real_t *invtvwgt, *minbalv, *newbalv, minbal, newbal; + rpq_t **queues; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + invtvwgt = graph->invtvwgt; + where = graph->where; + id = graph->id; + ed = graph->ed; + pwgts = graph->pwgts; + bndptr = graph->bndptr; + bndind = graph->bndind; + + moved = iwspacemalloc(ctrl, nvtxs); + swaps = iwspacemalloc(ctrl, nvtxs); + perm = iwspacemalloc(ctrl, nvtxs); + qnum = iwspacemalloc(ctrl, nvtxs); + newbalv = rwspacemalloc(ctrl, ncon); + minbalv = rwspacemalloc(ctrl, ncon); + qsizes = iwspacemalloc(ctrl, 2*ncon); + + limit = gk_min(gk_max(0.01*nvtxs, 15), 100); + + /* Initialize the queues */ + queues = (rpq_t **)wspacemalloc(ctrl, 2*ncon*sizeof(rpq_t *)); + for (i=0; i<2*ncon; i++) { + queues[i] = rpqCreate(nvtxs); + qsizes[i] = 0; + } + + for (i=0; i qsizes[2*j+from] && + vwgt[i*ncon+qnum[i]]*invtvwgt[qnum[i]] < 1.3*vwgt[i*ncon+j]*invtvwgt[j]) { + qsizes[2*qnum[i]+from]--; + qsizes[2*j+from]++; + qnum[i] = j; + } + } + } + } + } + + + minbal = ComputeLoadImbalanceDiffVec(graph, 2, ctrl->pijbm, ctrl->ubfactors, minbalv); + ASSERT(minbal > 0.0); + + newcut = mincut = graph->mincut; + mincutorder = -1; + + if (ctrl->dbglvl&METIS_DBG_REFINE) { + printf("Parts: ["); + for (l=0; lnvtxs, graph->nbnd, graph->mincut, minbal); + } + + iset(nvtxs, -1, moved); + + ASSERT(ComputeCut(graph, where) == graph->mincut); + ASSERT(CheckBnd(graph)); + + /* Insert all nodes in the priority queues */ + nbnd = graph->nbnd; + irandArrayPermute(nvtxs, perm, nvtxs/10, 1); + for (ii=0; iipijbm, ctrl->ubfactors, queues, &from, &cnum); + to = (from+1)%2; + + if (from == -1 || (higain = rpqGetTop(queues[2*cnum+from])) == -1) + break; + + newcut -= (ed[higain]-id[higain]); + + iaxpy(ncon, 1, vwgt+higain*ncon, 1, pwgts+to*ncon, 1); + iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+from*ncon, 1); + newbal = ComputeLoadImbalanceDiffVec(graph, 2, ctrl->pijbm, ctrl->ubfactors, newbalv); + + if (newbal < minbal || (newbal == minbal && + (newcut < mincut || + (newcut == mincut && BetterBalance2Way(ncon, minbalv, newbalv))))) { + mincut = newcut; + minbal = newbal; + mincutorder = nswaps; + rcopy(ncon, newbalv, minbalv); + } + else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */ + newcut += (ed[higain]-id[higain]); + iaxpy(ncon, 1, vwgt+higain*ncon, 1, pwgts+from*ncon, 1); + iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+to*ncon, 1); + break; + } + + where[higain] = to; + moved[higain] = nswaps; + swaps[nswaps] = higain; + + if (ctrl->dbglvl&METIS_DBG_MOVEINFO) { + printf("Moved %6"PRIDX" from %"PRIDX"(%"PRIDX"). Gain: %5"PRIDX", " + "Cut: %5"PRIDX", NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut); + for (l=0; l 0 && bndptr[higain] == -1) + BNDInsert(nbnd, bndind, bndptr, higain); + + for (j=xadj[higain]; jdbglvl, DBG_REFINE, - printf("\tMinimum cut: %6d, PWGTS: [%6d %6d], NBND: %6d\n", mincut, pwgts[0], pwgts[1], nbnd)); + + + /**************************************************************** + * Roll back computations + *****************************************************************/ + for (nswaps--; nswaps>mincutorder; nswaps--) { + higain = swaps[nswaps]; + + to = where[higain] = (where[higain]+1)%2; + SWAP(id[higain], ed[higain], tmp); + if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) + BNDDelete(nbnd, bndind, bndptr, higain); + else if (ed[higain] > 0 && bndptr[higain] == -1) + BNDInsert(nbnd, bndind, bndptr, higain); + + iaxpy(ncon, 1, vwgt+higain*ncon, 1, pwgts+to*ncon, 1); + iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+((to+1)%2)*ncon, 1); + for (j=xadj[higain]; j 0) + BNDInsert(nbnd, bndind, bndptr, k); + } + } + + if (ctrl->dbglvl&METIS_DBG_REFINE) { + printf("\tMincut: %6"PRIDX" at %5"PRIDX", NBND: %6"PRIDX", NPwgts: [", + mincut, mincutorder, nbnd); + for (l=0; lpijbm)); + } graph->mincut = mincut; - graph->nbnd = nbnd; + graph->nbnd = nbnd; + - PQueueFree(ctrl, &parts); + for (i=0; i<2*ncon; i++) + rpqDestroy(queues[i]); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); + WCOREPOP; } + diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/bucketsort.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/bucketsort.c index 0e9366e8..e126d02a 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/bucketsort.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/bucketsort.c @@ -9,25 +9,26 @@ * Started 7/25/97 * George * - * $Id: bucketsort.c,v 1.1 2003/07/16 15:55:00 karypis Exp $ - * */ -#include +#include "metislib.h" /************************************************************************* * This function uses simple counting sort to return a permutation array -* corresponding to the sorted order. The keys are assumed to start from +* corresponding to the sorted order. The keys are arsumed to start from * 0 and they are positive. This sorting is used during matching. **************************************************************************/ -void BucketSortKeysInc(int n, int max, idxtype *keys, idxtype *tperm, idxtype *perm) +void BucketSortKeysInc(ctrl_t *ctrl, idx_t n, idx_t max, idx_t *keys, + idx_t *tperm, idx_t *perm) { - int i, ii; - idxtype *counts; + idx_t i, ii; + idx_t *counts; + + WCOREPUSH; - counts = idxsmalloc(max+2, 0, "BucketSortKeysInc: counts"); + counts = iset(max+2, 0, iwspacemalloc(ctrl, max+2)); for (i=0; i - - - -/************************************************************************* -* This function creates the coarser graph -**************************************************************************/ -void CreateCoarseGraph(CtrlType *ctrl, GraphType *graph, int cnvtxs, idxtype *match, idxtype *perm) -{ - int i, j, jj, k, kk, l, m, istart, iend, nvtxs, nedges, ncon, cnedges, v, u, mask, dovsize; - idxtype *xadj, *vwgt, *vsize, *adjncy, *adjwgt, *adjwgtsum, *auxadj; - idxtype *cmap, *htable; - idxtype *cxadj, *cvwgt, *cvsize, *cadjncy, *cadjwgt, *cadjwgtsum; - float *nvwgt, *cnvwgt; - GraphType *cgraph; - - dovsize = (ctrl->optype == OP_KVMETIS ? 1 : 0); - - mask = HTLENGTH; - if (cnvtxs < 8*mask || graph->nedges/graph->nvtxs > 15) { - CreateCoarseGraphNoMask(ctrl, graph, cnvtxs, match, perm); - return; - } - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ContractTmr)); - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - vwgt = graph->vwgt; - vsize = graph->vsize; - nvwgt = graph->nvwgt; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - adjwgtsum = graph->adjwgtsum; - cmap = graph->cmap; - - /* Initialize the coarser graph */ - cgraph = SetUpCoarseGraph(graph, cnvtxs, dovsize); - cxadj = cgraph->xadj; - cvwgt = cgraph->vwgt; - cvsize = cgraph->vsize; - cnvwgt = cgraph->nvwgt; - cadjwgtsum = cgraph->adjwgtsum; - cadjncy = cgraph->adjncy; - cadjwgt = cgraph->adjwgt; - - - iend = xadj[nvtxs]; - auxadj = ctrl->wspace.auxcore; - memcpy(auxadj, adjncy, iend*sizeof(idxtype)); - for (i=0; i= 0 && cadjncy[jj] != cnvtxs) { - for (jj=0; jj= 0 && cadjncy[jj] == cnvtxs) { /* This 2nd check is needed for non-adjacent matchings */ - cadjwgtsum[cnvtxs] -= cadjwgt[jj]; - cadjncy[jj] = cadjncy[--nedges]; - cadjwgt[jj] = cadjwgt[nedges]; - } - } - - ASSERTP(cadjwgtsum[cnvtxs] == idxsum(nedges, cadjwgt), ("%d %d %d %d %d\n", cnvtxs, cadjwgtsum[cnvtxs], idxsum(nedges, cadjwgt), adjwgtsum[u], adjwgtsum[v])); - - for (j=0; jnedges = cnedges; - - ReAdjustMemory(graph, cgraph, dovsize); - - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ContractTmr)); - - idxwspacefree(ctrl, mask+1); - -} - - -/************************************************************************* -* This function creates the coarser graph -**************************************************************************/ -void CreateCoarseGraphNoMask(CtrlType *ctrl, GraphType *graph, int cnvtxs, idxtype *match, idxtype *perm) -{ - int i, j, k, m, istart, iend, nvtxs, nedges, ncon, cnedges, v, u, dovsize; - idxtype *xadj, *vwgt, *vsize, *adjncy, *adjwgt, *adjwgtsum, *auxadj; - idxtype *cmap, *htable; - idxtype *cxadj, *cvwgt, *cvsize, *cadjncy, *cadjwgt, *cadjwgtsum; - float *nvwgt, *cnvwgt; - GraphType *cgraph; - - dovsize = (ctrl->optype == OP_KVMETIS ? 1 : 0); - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ContractTmr)); - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - vwgt = graph->vwgt; - vsize = graph->vsize; - nvwgt = graph->nvwgt; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - adjwgtsum = graph->adjwgtsum; - cmap = graph->cmap; - - - /* Initialize the coarser graph */ - cgraph = SetUpCoarseGraph(graph, cnvtxs, dovsize); - cxadj = cgraph->xadj; - cvwgt = cgraph->vwgt; - cvsize = cgraph->vsize; - cnvwgt = cgraph->nvwgt; - cadjwgtsum = cgraph->adjwgtsum; - cadjncy = cgraph->adjncy; - cadjwgt = cgraph->adjwgt; - - - htable = idxset(cnvtxs, -1, idxwspacemalloc(ctrl, cnvtxs)); - - iend = xadj[nvtxs]; - auxadj = ctrl->wspace.auxcore; - memcpy(auxadj, adjncy, iend*sizeof(idxtype)); - for (i=0; inedges = cnedges; - - ReAdjustMemory(graph, cgraph, dovsize); - - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ContractTmr)); - - idxwspacefree(ctrl, cnvtxs); -} - - -/************************************************************************* -* This function creates the coarser graph -**************************************************************************/ -void CreateCoarseGraph_NVW(CtrlType *ctrl, GraphType *graph, int cnvtxs, idxtype *match, idxtype *perm) -{ - int i, j, jj, k, kk, l, m, istart, iend, nvtxs, nedges, ncon, cnedges, v, u, mask; - idxtype *xadj, *adjncy, *adjwgtsum, *auxadj; - idxtype *cmap, *htable; - idxtype *cxadj, *cvwgt, *cadjncy, *cadjwgt, *cadjwgtsum; - float *nvwgt, *cnvwgt; - GraphType *cgraph; - - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ContractTmr)); - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - nvwgt = graph->nvwgt; - adjncy = graph->adjncy; - adjwgtsum = graph->adjwgtsum; - cmap = graph->cmap; - - /* Initialize the coarser graph */ - cgraph = SetUpCoarseGraph(graph, cnvtxs, 0); - cxadj = cgraph->xadj; - cvwgt = cgraph->vwgt; - cnvwgt = cgraph->nvwgt; - cadjwgtsum = cgraph->adjwgtsum; - cadjncy = cgraph->adjncy; - cadjwgt = cgraph->adjwgt; - - - iend = xadj[nvtxs]; - auxadj = ctrl->wspace.auxcore; - memcpy(auxadj, adjncy, iend*sizeof(idxtype)); - for (i=0; i= 0 && cadjncy[jj] != cnvtxs) { - for (jj=0; jj= 0 && cadjncy[jj] == cnvtxs) { /* This 2nd check is needed for non-adjacent matchings */ - cadjwgtsum[cnvtxs] -= cadjwgt[jj]; - cadjncy[jj] = cadjncy[--nedges]; - cadjwgt[jj] = cadjwgt[nedges]; - } - } - - ASSERTP(cadjwgtsum[cnvtxs] == idxsum(nedges, cadjwgt), ("%d %d %d %d %d\n", cnvtxs, cadjwgtsum[cnvtxs], idxsum(nedges, cadjwgt), adjwgtsum[u], adjwgtsum[v])); - - for (j=0; jnedges = cnedges; - - ReAdjustMemory(graph, cgraph, 0); - - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ContractTmr)); - - idxwspacefree(ctrl, mask+1); - -} - - -/************************************************************************* -* Setup the various arrays for the coarse graph -**************************************************************************/ -GraphType *SetUpCoarseGraph(GraphType *graph, int cnvtxs, int dovsize) -{ - GraphType *cgraph; - - cgraph = CreateGraph(); - cgraph->nvtxs = cnvtxs; - cgraph->ncon = graph->ncon; - - cgraph->finer = graph; - graph->coarser = cgraph; - - - /* Allocate memory for the coarser graph */ - if (graph->ncon == 1) { - if (dovsize) { - cgraph->gdata = idxmalloc(5*cnvtxs+1 + 2*graph->nedges, "SetUpCoarseGraph: gdata"); - cgraph->xadj = cgraph->gdata; - cgraph->vwgt = cgraph->gdata + cnvtxs+1; - cgraph->vsize = cgraph->gdata + 2*cnvtxs+1; - cgraph->adjwgtsum = cgraph->gdata + 3*cnvtxs+1; - cgraph->cmap = cgraph->gdata + 4*cnvtxs+1; - cgraph->adjncy = cgraph->gdata + 5*cnvtxs+1; - cgraph->adjwgt = cgraph->gdata + 5*cnvtxs+1 + graph->nedges; - } - else { - cgraph->gdata = idxmalloc(4*cnvtxs+1 + 2*graph->nedges, "SetUpCoarseGraph: gdata"); - cgraph->xadj = cgraph->gdata; - cgraph->vwgt = cgraph->gdata + cnvtxs+1; - cgraph->adjwgtsum = cgraph->gdata + 2*cnvtxs+1; - cgraph->cmap = cgraph->gdata + 3*cnvtxs+1; - cgraph->adjncy = cgraph->gdata + 4*cnvtxs+1; - cgraph->adjwgt = cgraph->gdata + 4*cnvtxs+1 + graph->nedges; - } - } - else { - if (dovsize) { - cgraph->gdata = idxmalloc(4*cnvtxs+1 + 2*graph->nedges, "SetUpCoarseGraph: gdata"); - cgraph->xadj = cgraph->gdata; - cgraph->vsize = cgraph->gdata + cnvtxs+1; - cgraph->adjwgtsum = cgraph->gdata + 2*cnvtxs+1; - cgraph->cmap = cgraph->gdata + 3*cnvtxs+1; - cgraph->adjncy = cgraph->gdata + 4*cnvtxs+1; - cgraph->adjwgt = cgraph->gdata + 4*cnvtxs+1 + graph->nedges; - } - else { - cgraph->gdata = idxmalloc(3*cnvtxs+1 + 2*graph->nedges, "SetUpCoarseGraph: gdata"); - cgraph->xadj = cgraph->gdata; - cgraph->adjwgtsum = cgraph->gdata + cnvtxs+1; - cgraph->cmap = cgraph->gdata + 2*cnvtxs+1; - cgraph->adjncy = cgraph->gdata + 3*cnvtxs+1; - cgraph->adjwgt = cgraph->gdata + 3*cnvtxs+1 + graph->nedges; - } - - cgraph->nvwgt = fmalloc(graph->ncon*cnvtxs, "SetUpCoarseGraph: nvwgt"); - } - - return cgraph; -} - - -/************************************************************************* -* This function re-adjusts the amount of memory that was allocated if -* it will lead to significant savings -**************************************************************************/ -void ReAdjustMemory(GraphType *graph, GraphType *cgraph, int dovsize) -{ - - if (cgraph->nedges > 100000 && graph->nedges < 0.7*graph->nedges) { - idxcopy(cgraph->nedges, cgraph->adjwgt, cgraph->adjncy+cgraph->nedges); - - if (graph->ncon == 1) { - if (dovsize) { - cgraph->gdata = (idxtype *)realloc(cgraph->gdata, (5*cgraph->nvtxs+1 + 2*cgraph->nedges)*sizeof(idxtype)); - - /* Do this, in case everything was copied into new space */ - cgraph->xadj = cgraph->gdata; - cgraph->vwgt = cgraph->gdata + cgraph->nvtxs+1; - cgraph->vsize = cgraph->gdata + 2*cgraph->nvtxs+1; - cgraph->adjwgtsum = cgraph->gdata + 3*cgraph->nvtxs+1; - cgraph->cmap = cgraph->gdata + 4*cgraph->nvtxs+1; - cgraph->adjncy = cgraph->gdata + 5*cgraph->nvtxs+1; - cgraph->adjwgt = cgraph->gdata + 5*cgraph->nvtxs+1 + cgraph->nedges; - } - else { - cgraph->gdata = (idxtype *)realloc(cgraph->gdata, (4*cgraph->nvtxs+1 + 2*cgraph->nedges)*sizeof(idxtype)); - - /* Do this, in case everything was copied into new space */ - cgraph->xadj = cgraph->gdata; - cgraph->vwgt = cgraph->gdata + cgraph->nvtxs+1; - cgraph->adjwgtsum = cgraph->gdata + 2*cgraph->nvtxs+1; - cgraph->cmap = cgraph->gdata + 3*cgraph->nvtxs+1; - cgraph->adjncy = cgraph->gdata + 4*cgraph->nvtxs+1; - cgraph->adjwgt = cgraph->gdata + 4*cgraph->nvtxs+1 + cgraph->nedges; - } - } - else { - if (dovsize) { - cgraph->gdata = (idxtype *)realloc(cgraph->gdata, (4*cgraph->nvtxs+1 + 2*cgraph->nedges)*sizeof(idxtype)); - - /* Do this, in case everything was copied into new space */ - cgraph->xadj = cgraph->gdata; - cgraph->vsize = cgraph->gdata + cgraph->nvtxs+1; - cgraph->adjwgtsum = cgraph->gdata + 2*cgraph->nvtxs+1; - cgraph->cmap = cgraph->gdata + 3*cgraph->nvtxs+1; - cgraph->adjncy = cgraph->gdata + 4*cgraph->nvtxs+1; - cgraph->adjwgt = cgraph->gdata + 4*cgraph->nvtxs+1 + cgraph->nedges; - } - else { - cgraph->gdata = (idxtype *)realloc(cgraph->gdata, (3*cgraph->nvtxs+1 + 2*cgraph->nedges)*sizeof(idxtype)); - - /* Do this, in case everything was copied into new space */ - cgraph->xadj = cgraph->gdata; - cgraph->adjwgtsum = cgraph->gdata + cgraph->nvtxs+1; - cgraph->cmap = cgraph->gdata + 2*cgraph->nvtxs+1; - cgraph->adjncy = cgraph->gdata + 3*cgraph->nvtxs+1; - cgraph->adjwgt = cgraph->gdata + 3*cgraph->nvtxs+1 + cgraph->nedges; - } - } - } - -} diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/checkgraph.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/checkgraph.c index d7f5a29f..77f1e358 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/checkgraph.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/checkgraph.c @@ -8,78 +8,86 @@ * Started 8/28/94 * George * - * $Id: checkgraph.c,v 1.1 2003/07/24 18:39:06 karypis Exp $ - * */ -#include +#include "metislib.h" + +/*************************************************************************/ +/*! This function checks if a graph is valid. A valid graph must satisfy + the following constraints: + - It should contain no self-edges. + - It should be undirected; i.e., (u,v) and (v,u) should be present. + - The adjacency list should not contain multiple edges to the same + other vertex. -/************************************************************************* -* This function checks if a graph is valid -**************************************************************************/ -int CheckGraph(GraphType *graph) + \param graph is the graph to be checked, whose numbering starts from 0. + \param numflag is 0 if error reporting will be done using 0 as the + numbering, or 1 if the reporting should be done using 1. + \param verbose is 1 the identified errors will be displayed, or 0, if + it should run silently. +*/ +/*************************************************************************/ +int CheckGraph(graph_t *graph, int numflag, int verbose) { - int i, j, k, l; - int nvtxs, ncon, err=0; - int minedge, maxedge, minewgt, maxewgt; - float minvwgt[MAXNCON], maxvwgt[MAXNCON]; - idxtype *xadj, *adjncy, *adjwgt, *htable; - float *nvwgt, ntvwgts[MAXNCON]; - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - nvwgt = graph->nvwgt; + idx_t i, j, k, l; + idx_t nvtxs, err=0; + idx_t minedge, maxedge, minewgt, maxewgt; + idx_t *xadj, *adjncy, *adjwgt, *htable; + + numflag = (numflag == 0 ? 0 : 1); /* make sure that numflag is 0 or 1 */ + + nvtxs = graph->nvtxs; + xadj = graph->xadj; adjncy = graph->adjncy; adjwgt = graph->adjwgt; - htable = idxsmalloc(nvtxs, 0, "htable"); + htable = ismalloc(nvtxs, 0, "htable"); - if (ncon > 1) { - for (j=0; jnedges > 0) { + minedge = maxedge = adjncy[0]; + if (adjwgt) + minewgt = maxewgt = adjwgt[0]; } - minedge = maxedge = adjncy[0]; - minewgt = maxewgt = adjwgt[0]; - for (i=0; i 1) { - for (j=0; j maxvwgt[j]) ? nvwgt[i*ncon+j] : maxvwgt[j]; - } - } - for (j=xadj[i]; j maxedge) ? k : maxedge; - minewgt = (adjwgt[j] < minewgt) ? adjwgt[j] : minewgt; - maxewgt = (adjwgt[j] > maxewgt) ? adjwgt[j] : maxewgt; + if (adjwgt) { + minewgt = (adjwgt[j] < minewgt) ? adjwgt[j] : minewgt; + maxewgt = (adjwgt[j] > maxewgt) ? adjwgt[j] : maxewgt; + } if (i == k) { - printf("Vertex %d contains a self-loop (i.e., diagonal entry in the matrix)!\n", i); + if (verbose) + printf("Vertex %"PRIDX" contains a self-loop " + "(i.e., diagonal entry in the matrix)!\n", i+numflag); err++; } else { for (l=xadj[k]; l 0 && verbose) { + printf("A total of %"PRIDX" errors exist in the input file. " + "Correct them, and run again!\n", err); + } + + gk_free((void **)&htable, LTERM); + + return (err == 0 ? 1 : 0); +} + + +/*************************************************************************/ +/*! This function performs a quick check of the weights of the graph */ +/*************************************************************************/ +int CheckInputGraphWeights(idx_t nvtxs, idx_t ncon, idx_t *xadj, idx_t *adjncy, + idx_t *vwgt, idx_t *vsize, idx_t *adjwgt) +{ + idx_t i; + + if (ncon <= 0) { + printf("Input Error: ncon must be >= 1.\n"); + return 0; + } + + if (vwgt) { + for (i=ncon*nvtxs; i>=0; i--) { + if (vwgt[i] < 0) { + printf("Input Error: negative vertex weight(s).\n"); + return 0; + } + } + } + if (vsize) { + for (i=nvtxs; i>=0; i--) { + if (vsize[i] < 0) { + printf("Input Error: negative vertex sizes(s).\n"); + return 0; + } + } + } + if (adjwgt) { + for (i=xadj[nvtxs]-1; i>=0; i--) { + if (adjwgt[i] < 0) { + printf("Input Error: non-positive edge weight(s).\n"); + return 0; + } } } - if (ncon > 1) { - for (j=0; j 0.0001) { - printf("Normalized vwgts don't sum to one. Weight %d = %.8f.\n", j, ntvwgts[j]); - err++; + return 1; +} + + +/*************************************************************************/ +/*! This function creates a graph whose topology is consistent with + Metis' requirements that: + - There are no self-edges. + - It is undirected; i.e., (u,v) and (v,u) should be present and of the + same weight. + - The adjacency list should not contain multiple edges to the same + other vertex. + + Any of the above errors are fixed by performing the following operations: + - Self-edges are removed. + - The undirected graph is formed by the union of edges. + - One of the duplicate edges is selected. + + The routine does not change the provided vertex weights. +*/ +/*************************************************************************/ +graph_t *FixGraph(graph_t *graph) +{ + idx_t i, j, k, l, nvtxs, nedges; + idx_t *xadj, *adjncy, *adjwgt; + idx_t *nxadj, *nadjncy, *nadjwgt; + graph_t *ngraph; + uvw_t *edges; + + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + ASSERT(adjwgt != NULL); + + ngraph = CreateGraph(); + + ngraph->nvtxs = nvtxs; + + /* deal with vertex weights/sizes */ + ngraph->ncon = graph->ncon; + ngraph->vwgt = icopy(nvtxs*graph->ncon, graph->vwgt, + imalloc(nvtxs*graph->ncon, "FixGraph: vwgt")); + + ngraph->vsize = ismalloc(nvtxs, 1, "FixGraph: vsize"); + if (graph->vsize) + icopy(nvtxs, graph->vsize, ngraph->vsize); + + /* fix graph by sorting the "superset" of edges */ + edges = (uvw_t *)gk_malloc(sizeof(uvw_t)*2*xadj[nvtxs], "FixGraph: edges"); + + for (nedges=0, i=0; i adjncy[j]) { + edges[nedges].u = adjncy[j]; + edges[nedges].v = i; + edges[nedges].w = adjwgt[j]; + nedges++; } } } -/* - printf("errs: %d, adjncy: [%d %d], adjwgt: [%d %d]\n", - err, minedge, maxedge, minewgt, maxewgt); - if (ncon > 1) { - for (j=0; j 0) { - printf("A total of %d errors exist in the input file. Correct them, and run again!\n", err); + nedges = k+1; + + /* allocate memory for the fixed graph */ + nxadj = ngraph->xadj = ismalloc(nvtxs+1, 0, "FixGraph: nxadj"); + nadjncy = ngraph->adjncy = imalloc(2*nedges, "FixGraph: nadjncy"); + nadjwgt = ngraph->adjwgt = imalloc(2*nedges, "FixGraph: nadjwgt"); + + /* create the adjacency list of the fixed graph from the upper-triangular + part of the adjacency matrix */ + for (k=0; kdbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->CoarsenTmr)); + + /* determine if the weights on the edges are all the same */ + for (eqewgts=1, i=1; inedges; i++) { + if (graph->adjwgt[0] != graph->adjwgt[i]) { + eqewgts = 0; + break; + } + } + + /* set the maximum allowed coarsest vertex weight */ + for (i=0; incon; i++) + ctrl->maxvwgt[i] = 1.5*graph->tvwgt[i]/ctrl->CoarsenTo; + + do { + IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, PrintCGraphStats(ctrl, graph)); + + /* allocate memory for cmap, if it has not already been done due to + multiple cuts */ + if (graph->cmap == NULL) + graph->cmap = imalloc(graph->nvtxs, "CoarsenGraph: graph->cmap"); + + /* determine which matching scheme you will use */ + switch (ctrl->ctype) { + case METIS_CTYPE_RM: + Match_RM(ctrl, graph); + break; + case METIS_CTYPE_SHEM: + if (eqewgts || graph->nedges == 0) + Match_RM(ctrl, graph); + else + Match_SHEM(ctrl, graph); + break; + default: + gk_errexit(SIGERR, "Unknown ctype: %d\n", ctrl->ctype); + } + + graph_WriteToDisk(ctrl, graph); + + graph = graph->coarser; + eqewgts = 0; + level++; + + ASSERT(CheckGraph(graph, 0, 1)); + + } while (graph->nvtxs > ctrl->CoarsenTo && + graph->nvtxs < COARSEN_FRACTION*graph->finer->nvtxs && + graph->nedges > graph->nvtxs/2); + + IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, PrintCGraphStats(ctrl, graph)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->CoarsenTmr)); + + return graph; +} + + +/*************************************************************************/ +/*! This function takes a graph and creates a sequence of nlevels coarser + graphs, where nlevels is an input parameter. */ +/*************************************************************************/ +graph_t *CoarsenGraphNlevels(ctrl_t *ctrl, graph_t *graph, idx_t nlevels) +{ + idx_t i, eqewgts, level; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->CoarsenTmr)); + + /* determine if the weights on the edges are all the same */ + for (eqewgts=1, i=1; inedges; i++) { + if (graph->adjwgt[0] != graph->adjwgt[i]) { + eqewgts = 0; + break; + } + } + + /* set the maximum allowed coarsest vertex weight */ + for (i=0; incon; i++) + ctrl->maxvwgt[i] = 1.5*graph->tvwgt[i]/ctrl->CoarsenTo; + + for (level=0; leveldbglvl, METIS_DBG_COARSEN, PrintCGraphStats(ctrl, graph)); + + /* allocate memory for cmap, if it has not already been done due to + multiple cuts */ + if (graph->cmap == NULL) + graph->cmap = imalloc(graph->nvtxs, "CoarsenGraph: graph->cmap"); -#include + /* determine which matching scheme you will use */ + switch (ctrl->ctype) { + case METIS_CTYPE_RM: + Match_RM(ctrl, graph); + break; + case METIS_CTYPE_SHEM: + if (eqewgts || graph->nedges == 0) + Match_RM(ctrl, graph); + else + Match_SHEM(ctrl, graph); + break; + default: + gk_errexit(SIGERR, "Unknown ctype: %d\n", ctrl->ctype); + } + + graph_WriteToDisk(ctrl, graph); + + graph = graph->coarser; + eqewgts = 0; + + ASSERT(CheckGraph(graph, 0, 1)); + + if (graph->nvtxs < ctrl->CoarsenTo || + graph->nvtxs > COARSEN_FRACTION*graph->finer->nvtxs || + graph->nedges < graph->nvtxs/2) + break; + } + IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, PrintCGraphStats(ctrl, graph)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->CoarsenTmr)); -/************************************************************************* -* This function takes a graph and creates a sequence of coarser graphs -**************************************************************************/ -GraphType *Coarsen2Way(CtrlType *ctrl, GraphType *graph) + return graph; +} + + +/*************************************************************************/ +/*! This function finds a matching by randomly selecting one of the + unmatched adjacent vertices. + */ +/**************************************************************************/ +idx_t Match_RM(ctrl_t *ctrl, graph_t *graph) { - int clevel; - GraphType *cgraph; + idx_t i, pi, ii, j, jj, jjinc, k, nvtxs, ncon, cnvtxs, maxidx, + last_unmatched, avgdegree, bnum; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *maxvwgt; + idx_t *match, *cmap, *degrees, *perm, *tperm; + size_t nunmatched=0; - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->CoarsenTmr)); + WCOREPUSH; - cgraph = graph; + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->MatchTmr)); - /* The following is ahack to allow the multiple bisections to go through with correct - coarsening */ - if (ctrl->CType > 20) { - clevel = 1; - ctrl->CType -= 20; + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + cmap = graph->cmap; + + maxvwgt = ctrl->maxvwgt; + + match = iset(nvtxs, UNMATCHED, iwspacemalloc(ctrl, nvtxs)); + perm = iwspacemalloc(ctrl, nvtxs); + tperm = iwspacemalloc(ctrl, nvtxs); + degrees = iwspacemalloc(ctrl, nvtxs); + + /* Determine a "random" traversal order that is biased towards + low-degree vertices */ + irandArrayPermute(nvtxs, tperm, nvtxs/8, 1); + + avgdegree = 4.0*(xadj[nvtxs]/nvtxs); + for (i=0; i avgdegree ? avgdegree : bnum); } - else - clevel = 0; + BucketSortKeysInc(ctrl, nvtxs, avgdegree, degrees, tperm, perm); - do { - IFSET(ctrl->dbglvl, DBG_COARSEN, printf("%6d %7d [%d] [%d %d]\n", - cgraph->nvtxs, cgraph->nedges, ctrl->CoarsenTo, ctrl->maxvwgt, - (cgraph->vwgt ? idxsum(cgraph->nvtxs, cgraph->vwgt) : cgraph->nvtxs))); - - if (cgraph->adjwgt) { - switch (ctrl->CType) { - case MATCH_RM: - Match_RM(ctrl, cgraph); - break; - case MATCH_HEM: - if (clevel < 1 || cgraph->nedges == 0) - Match_RM(ctrl, cgraph); - else - Match_HEM(ctrl, cgraph); - break; - case MATCH_SHEM: - if (clevel < 1 || cgraph->nedges == 0) - Match_RM(ctrl, cgraph); - else - Match_SHEM(ctrl, cgraph); - break; - case MATCH_SHEMKWAY: - if (cgraph->nedges == 0) - Match_RM(ctrl, cgraph); - else - Match_SHEM(ctrl, cgraph); + + /* Traverse the vertices and compute the matching */ + for (cnvtxs=0, last_unmatched=0, pi=0; pimaxvwgt requirements */ + if (xadj[i] == xadj[i+1]) { + last_unmatched = gk_max(pi, last_unmatched)+1; + for (; last_unmatchedno2hop && nunmatched > UNMATCHEDFOR2HOP*nvtxs) + cnvtxs = Match_2Hop(ctrl, graph, perm, match, cnvtxs, nunmatched); + + + /* match the final unmatched vertices with themselves and reorder the vertices + of the coarse graph for memory-friendly contraction */ + for (cnvtxs=0, i=0; idbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->MatchTmr)); + + CreateCoarseGraph(ctrl, graph, cnvtxs, match); + + WCOREPOP; + + return cnvtxs; +} + + +/**************************************************************************/ +/*! This function finds a matching using the HEM heuristic. The vertices + are visited based on increasing degree to ensure that all vertices are + given a chance to match with something. + */ +/**************************************************************************/ +idx_t Match_SHEM(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, pi, ii, j, jj, jjinc, k, nvtxs, ncon, cnvtxs, maxidx, maxwgt, + last_unmatched, avgdegree, bnum; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *maxvwgt; + idx_t *match, *cmap, *degrees, *perm, *tperm; + size_t nunmatched=0; + + WCOREPUSH; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->MatchTmr)); + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + cmap = graph->cmap; + + maxvwgt = ctrl->maxvwgt; + + match = iset(nvtxs, UNMATCHED, iwspacemalloc(ctrl, nvtxs)); + perm = iwspacemalloc(ctrl, nvtxs); + tperm = iwspacemalloc(ctrl, nvtxs); + degrees = iwspacemalloc(ctrl, nvtxs); + + /* Determine a "random" traversal order that is biased towards low-degree vertices */ + irandArrayPermute(nvtxs, tperm, nvtxs/8, 1); + + avgdegree = 4.0*(xadj[nvtxs]/nvtxs); + for (i=0; i avgdegree ? avgdegree : bnum); + } + BucketSortKeysInc(ctrl, nvtxs, avgdegree, degrees, tperm, perm); + + + /* Traverse the vertices and compute the matching */ + for (cnvtxs=0, last_unmatched=0, pi=0; pimaxvwgt requirements */ + if (xadj[i] == xadj[i+1]) { + last_unmatched = gk_max(pi, last_unmatched)+1; + for (; last_unmatchedinvtvwgt, vwgt+i*ncon, + vwgt+maxidx*ncon, vwgt+k*ncon)))) { + maxidx = k; + maxwgt = adjwgt[j]; + } + } + + /* If it did not match, record for a 2-hop matching. */ + if (maxidx == i && ivecaxpylez(ncon, 2, vwgt+i*ncon, vwgt+i*ncon, maxvwgt)) { + nunmatched++; + maxidx = UNMATCHED; + } + } + } + } + + if (maxidx != UNMATCHED) { + cmap[i] = cmap[maxidx] = cnvtxs++; + match[i] = maxidx; + match[maxidx] = i; + } + } + } + + //printf("nunmatched: %zu\n", nunmatched); + + /* see if a 2-hop matching is required/allowed */ + if (!ctrl->no2hop && nunmatched > UNMATCHEDFOR2HOP*nvtxs) + cnvtxs = Match_2Hop(ctrl, graph, perm, match, cnvtxs, nunmatched); + + + /* match the final unmatched vertices with themselves and reorder the vertices + of the coarse graph for memory-friendly contraction */ + for (cnvtxs=0, i=0; idbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->MatchTmr)); + + CreateCoarseGraph(ctrl, graph, cnvtxs, match); + + WCOREPOP; + + return cnvtxs; +} + + +/*************************************************************************/ +/*! This function matches the unmatched vertices using a 2-hop matching + that involves vertices that are two hops away from each other. */ +/**************************************************************************/ +idx_t Match_2Hop(ctrl_t *ctrl, graph_t *graph, idx_t *perm, idx_t *match, + idx_t cnvtxs, size_t nunmatched) +{ + + cnvtxs = Match_2HopAny(ctrl, graph, perm, match, cnvtxs, &nunmatched, 2); + cnvtxs = Match_2HopAll(ctrl, graph, perm, match, cnvtxs, &nunmatched, 64); + if (nunmatched > 1.5*UNMATCHEDFOR2HOP*graph->nvtxs) + cnvtxs = Match_2HopAny(ctrl, graph, perm, match, cnvtxs, &nunmatched, 3); + if (nunmatched > 2.0*UNMATCHEDFOR2HOP*graph->nvtxs) + cnvtxs = Match_2HopAny(ctrl, graph, perm, match, cnvtxs, &nunmatched, graph->nvtxs); + + return cnvtxs; +} + + +/*************************************************************************/ +/*! This function matches the unmatched vertices whose degree is less than + maxdegree using a 2-hop matching that involves vertices that are two + hops away from each other. + The requirement of the 2-hop matching is a simple non-empty overlap + between the adjancency lists of the vertices. */ +/**************************************************************************/ +idx_t Match_2HopAny(ctrl_t *ctrl, graph_t *graph, idx_t *perm, idx_t *match, + idx_t cnvtxs, size_t *r_nunmatched, size_t maxdegree) +{ + idx_t i, pi, ii, j, jj, k, nvtxs; + idx_t *xadj, *adjncy, *colptr, *rowind; + idx_t *cmap; + size_t nunmatched; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->Aux3Tmr)); + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + cmap = graph->cmap; + + nunmatched = *r_nunmatched; + + /*IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, printf("IN: nunmatched: %zu\t", nunmatched)); */ + + /* create the inverted index */ + WCOREPUSH; + colptr = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs+1)); + for (i=0; ij; jj--) { + if (match[rowind[jj]] == UNMATCHED) { + cmap[rowind[j]] = cmap[rowind[jj]] = cnvtxs++; + match[rowind[j]] = rowind[jj]; + match[rowind[jj]] = rowind[j]; + nunmatched -= 2; + break; + } + } + } + } + } + WCOREPOP; + + /*IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, printf("OUT: nunmatched: %zu\n", nunmatched)); */ + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->Aux3Tmr)); + + *r_nunmatched = nunmatched; + return cnvtxs; +} + + +/*************************************************************************/ +/*! This function matches the unmatched vertices whose degree is less than + maxdegree using a 2-hop matching that involves vertices that are two + hops away from each other. + The requirement of the 2-hop matching is that of identical adjacency + lists. + */ +/**************************************************************************/ +idx_t Match_2HopAll(ctrl_t *ctrl, graph_t *graph, idx_t *perm, idx_t *match, + idx_t cnvtxs, size_t *r_nunmatched, size_t maxdegree) +{ + idx_t i, pi, pk, ii, j, jj, k, nvtxs, mask, idegree; + idx_t *xadj, *adjncy; + idx_t *cmap, *mark; + ikv_t *keys; + size_t nunmatched, ncand; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->Aux3Tmr)); + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + cmap = graph->cmap; + + nunmatched = *r_nunmatched; + mask = IDX_MAX/maxdegree; + + /*IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, printf("IN: nunmatched: %zu\t", nunmatched)); */ + + WCOREPUSH; + + /* collapse vertices with identical adjancency lists */ + keys = ikvwspacemalloc(ctrl, nunmatched); + for (ncand=0, pi=0; pi 1 && idegree < maxdegree) { + for (k=0, j=xadj[i]; jCType); } + if (jj == xadj[k+1]) { + cmap[i] = cmap[k] = cnvtxs++; + match[i] = k; + match[k] = i; + nunmatched -= 2; + break; + } + } + } + WCOREPOP; + + /*IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, printf("OUT: ncand: %zu, nunmatched: %zu\n", ncand, nunmatched)); */ + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->Aux3Tmr)); + + *r_nunmatched = nunmatched; + return cnvtxs; +} + + +/*************************************************************************/ +/*! This function finds a matching by selecting an adjacent vertex based + on the Jaccard coefficient of the adjaceny lists. + */ +/**************************************************************************/ +idx_t Match_JC(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, pi, ii, iii, j, jj, jjj, jjinc, k, nvtxs, ncon, cnvtxs, maxidx, + last_unmatched, avgdegree, bnum; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *maxvwgt; + idx_t *match, *cmap, *degrees, *perm, *tperm, *vec, *marker; + idx_t mytwgt, xtwgt, ctwgt; + real_t bscore, score; + + WCOREPUSH; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->MatchTmr)); + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + cmap = graph->cmap; + + maxvwgt = ctrl->maxvwgt; + + match = iset(nvtxs, UNMATCHED, iwspacemalloc(ctrl, nvtxs)); + perm = iwspacemalloc(ctrl, nvtxs); + tperm = iwspacemalloc(ctrl, nvtxs); + degrees = iwspacemalloc(ctrl, nvtxs); + + irandArrayPermute(nvtxs, tperm, nvtxs/8, 1); + + avgdegree = 4.0*(xadj[nvtxs]/nvtxs); + for (i=0; i avgdegree ? avgdegree : bnum); + } + BucketSortKeysInc(ctrl, nvtxs, avgdegree, degrees, tperm, perm); + + /* point to the wspace vectors that are not needed any more */ + vec = tperm; + marker = degrees; + iset(nvtxs, -1, vec); + iset(nvtxs, -1, marker); + + for (cnvtxs=0, last_unmatched=0, pi=0; pimaxvwgt requirements */ + if (xadj[i] == xadj[i+1]) { + last_unmatched = gk_max(pi, last_unmatched)+1; + for (; last_unmatched maxvwgt[0]) + continue; + + ctwgt = xtwgt = 0; + for (jj=xadj[ii]; jj 0) + ctwgt += vec[adjncy[jj]] + adjwgt[jj]; + else if (adjncy[jj] == i) { + ctwgt += adjwgt[jj]; + xtwgt -= adjwgt[jj]; + } + } + + score = 1.0*ctwgt/(mytwgt+xtwgt-ctwgt); + if (score > bscore) { + bscore = score; + maxidx = ii; + } + marker[ii] = i; + } +#endif + + for (j=xadj[i]; j maxvwgt[0]) + continue; + + ctwgt = xtwgt = 0; + for (jjj=xadj[iii]; jjj 0) + ctwgt += 2;//vec[adjncy[jjj]] + adjwgt[jjj]; + else if (adjncy[jjj] == i) + ctwgt += 10*adjwgt[jjj]; + } + + score = 1.0*ctwgt/(mytwgt+xtwgt); + //printf("%"PRIDX" %"PRIDX" %"PRIDX" %.4f\n", mytwgt, xtwgt, ctwgt, score); + if (score > bscore) { + bscore = score; + maxidx = iii; + } + marker[iii] = i; + } + } + + /* reset vec array */ + for (j=xadj[i]; jdbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->MatchTmr)); + + CreateCoarseGraph(ctrl, graph, cnvtxs, match); - cgraph = cgraph->coarser; - clevel++; + WCOREPOP; - } while (cgraph->nvtxs > ctrl->CoarsenTo && cgraph->nvtxs < COARSEN_FRACTION2*cgraph->finer->nvtxs && cgraph->nedges > cgraph->nvtxs/2); + return cnvtxs; +} + + +/*************************************************************************/ +/*! This function prints various stats for each graph during coarsening + */ +/*************************************************************************/ +void PrintCGraphStats(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i; + + printf("%10"PRIDX" %10"PRIDX" %10"PRIDX" [%"PRIDX"] [", + graph->nvtxs, graph->nedges, isum(graph->nedges, graph->adjwgt, 1), ctrl->CoarsenTo); + + for (i=0; incon; i++) + printf(" %8"PRIDX":%8"PRIDX, ctrl->maxvwgt[i], graph->tvwgt[i]); + printf(" ]\n"); +} + + +/*************************************************************************/ +/*! This function creates the coarser graph. Depending on the size of the + candidate adjancency lists it either uses a hash table or an array + to do duplicate detection. + */ +/*************************************************************************/ +void CreateCoarseGraph(ctrl_t *ctrl, graph_t *graph, idx_t cnvtxs, + idx_t *match) +{ + idx_t j, jj, k, kk, l, m, istart, iend, nvtxs, nedges, ncon, + cnedges, v, u, mask; + idx_t *xadj, *vwgt, *vsize, *adjncy, *adjwgt; + idx_t *cmap, *htable, *dtable; + idx_t *cxadj, *cvwgt, *cvsize, *cadjncy, *cadjwgt; + graph_t *cgraph; + int dovsize, dropedges; + idx_t cv, nkeys, droppedewgt; + idx_t *keys=NULL, *medianewgts=NULL, *noise=NULL; - IFSET(ctrl->dbglvl, DBG_COARSEN, printf("%6d %7d [%d] [%d %d]\n", - cgraph->nvtxs, cgraph->nedges, ctrl->CoarsenTo, ctrl->maxvwgt, - (cgraph->vwgt ? idxsum(cgraph->nvtxs, cgraph->vwgt) : cgraph->nvtxs))); + WCOREPUSH; - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->CoarsenTmr)); + dovsize = (ctrl->objtype == METIS_OBJTYPE_VOL ? 1 : 0); + dropedges = ctrl->dropedges; + + mask = HTLENGTH; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->ContractTmr)); + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + vsize = graph->vsize; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + cmap = graph->cmap; + + /* Setup structures for dropedges */ + if (dropedges) { + for (nkeys=0, v=0; vxadj; + cvwgt = cgraph->vwgt; + cvsize = cgraph->vsize; + cadjncy = cgraph->adjncy; + cadjwgt = cgraph->adjwgt; + + htable = iset(mask+1, -1, iwspacemalloc(ctrl, mask+1)); /* hash table */ + dtable = iset(cnvtxs, -1, iwspacemalloc(ctrl, cnvtxs)); /* direct table */ + + cxadj[0] = cnvtxs = cnedges = 0; + for (v=0; v>2)) { /* use mask */ + /* put the ID of the contracted node itself at the start, so that it can be + * removed easily */ + htable[cnvtxs&mask] = 0; + cadjncy[0] = cnvtxs; + nedges = 1; + + istart = xadj[v]; + iend = xadj[v+1]; + for (j=istart; j=0; j--) { + k = cadjncy[j]; + for (kk=k&mask; cadjncy[htable[kk]]!=k; kk=((kk+1)&mask)); + htable[kk] = -1; + } + + /* remove the contracted vertex from the list */ + cadjncy[0] = cadjncy[--nedges]; + cadjwgt[0] = cadjwgt[nedges]; + } + else { + nedges = 0; + istart = xadj[v]; + iend = xadj[v+1]; + for (j=istart; j= min(medianewgts[u], medianewgts[v]) */ + if (dropedges) { + ASSERTP(nedges < nkeys, ("%"PRIDX", %"PRIDX"\n", nkeys, nedges)); + medianewgts[cnvtxs] = 8; /* default for island nodes */ + if (nedges > 0) { + for (j=0; j>1))]; + } + } + + cadjncy += nedges; + cadjwgt += nedges; + cnedges += nedges; + cxadj[++cnvtxs] = cnedges; + } + + + /* compact the adjacency structure of the coarser graph to keep only +ve edges */ + if (dropedges) { + droppedewgt = 0; + + cadjncy = cgraph->adjncy; + cadjwgt = cgraph->adjwgt; + + cnedges = 0; + for (u=0; u= 0, ("%"PRIDX" %"PRIDX"\n", u, medianewgts[u])); + ASSERTP(medianewgts[v] >= 0, ("%"PRIDX" %"PRIDX" %"PRIDX"\n", v, medianewgts[v], cnvtxs)); + if ((cadjwgt[j]<<8) + noise[u] + noise[v] >= gk_min(medianewgts[u], medianewgts[v])) { + cadjncy[cnedges] = cadjncy[j]; + cadjwgt[cnedges++] = cadjwgt[j]; + } + else + droppedewgt += cadjwgt[j]; + } + cxadj[u] = cnedges; + } + SHIFTCSR(j, cnvtxs, cxadj); + + cgraph->droppedewgt = droppedewgt; + } + + cgraph->nedges = cnedges; + + for (j=0; jtvwgt[j] = isum(cgraph->nvtxs, cgraph->vwgt+j, ncon); + cgraph->invtvwgt[j] = 1.0/(cgraph->tvwgt[j] > 0 ? cgraph->tvwgt[j] : 1); + } + + ReAdjustMemory(ctrl, graph, cgraph); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->ContractTmr)); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! Setup the various arrays for the coarse graph + */ +/*************************************************************************/ +graph_t *SetupCoarseGraph(graph_t *graph, idx_t cnvtxs, int dovsize) +{ + graph_t *cgraph; + + cgraph = CreateGraph(); + + cgraph->nvtxs = cnvtxs; + cgraph->ncon = graph->ncon; + + cgraph->finer = graph; + graph->coarser = cgraph; + + /* Allocate memory for the coarser graph. + NOTE: The +1 in the adjwgt/adjncy is to allow the optimization of self-loop + detection by adding ahead of time the self-loop. That optimization + requires a +1 adjncy/adjwgt array for the limit case where the + coarser graph is of the same size of the previous graph. */ + cgraph->xadj = imalloc(cnvtxs+1, "SetupCoarseGraph: xadj"); + cgraph->adjncy = imalloc(graph->nedges+1, "SetupCoarseGraph: adjncy"); + cgraph->adjwgt = imalloc(graph->nedges+1, "SetupCoarseGraph: adjwgt"); + cgraph->vwgt = imalloc(cgraph->ncon*cnvtxs, "SetupCoarseGraph: vwgt"); + cgraph->tvwgt = imalloc(cgraph->ncon, "SetupCoarseGraph: tvwgt"); + cgraph->invtvwgt = rmalloc(cgraph->ncon, "SetupCoarseGraph: invtvwgt"); + + if (dovsize) + cgraph->vsize = imalloc(cnvtxs, "SetupCoarseGraph: vsize"); return cgraph; } + +/*************************************************************************/ +/*! This function re-adjusts the amount of memory that was allocated if + it will lead to significant savings + */ +/*************************************************************************/ +void ReAdjustMemory(ctrl_t *ctrl, graph_t *graph, graph_t *cgraph) +{ + if (cgraph->nedges > 10000 && cgraph->nedges < 0.9*graph->nedges) { + cgraph->adjncy = irealloc(cgraph->adjncy, cgraph->nedges, "ReAdjustMemory: adjncy"); + cgraph->adjwgt = irealloc(cgraph->adjwgt, cgraph->nedges, "ReAdjustMemory: adjwgt"); + } +} diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/compress.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/compress.c index 1739feb3..d72472b2 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/compress.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/compress.c @@ -8,25 +8,31 @@ * * Started 9/17/97 * George - * - * $Id: compress.c,v 1.1 2003/07/16 15:55:01 karypis Exp $ */ -#include +#include "metislib.h" + +/*************************************************************************/ +/*! This function compresses a graph by merging identical vertices + The compression should lead to at least 10% reduction. + + The compressed graph that is generated has its adjwgts set to 1. -/************************************************************************* -* This function compresses a graph by merging identical vertices -* The compression should lead to at least 10% reduction. -**************************************************************************/ -void CompressGraph(CtrlType *ctrl, GraphType *graph, int nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *cptr, idxtype *cind) + \returns 1 if compression was performed, otherwise it returns 0. + +*/ +/**************************************************************************/ +graph_t *CompressGraph(ctrl_t *ctrl, idx_t nvtxs, idx_t *xadj, idx_t *adjncy, + idx_t *vwgt, idx_t *cptr, idx_t *cind) { - int i, ii, iii, j, jj, k, l, cnvtxs, cnedges; - idxtype *cxadj, *cadjncy, *cvwgt, *mark, *map; - KeyValueType *keys; + idx_t i, ii, iii, j, jj, k, l, cnvtxs, cnedges; + idx_t *cxadj, *cadjncy, *cvwgt, *mark, *map; + ikv_t *keys; + graph_t *graph=NULL; - mark = idxsmalloc(nvtxs, -1, "CompressGraph: mark"); - map = idxsmalloc(nvtxs, -1, "CompressGraph: map"); - keys = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "CompressGraph: keys"); + mark = ismalloc(nvtxs, -1, "CompressGraph: mark"); + map = ismalloc(nvtxs, -1, "CompressGraph: map"); + keys = ikvmalloc(nvtxs, "CompressGraph: keys"); /* Compute a key for each adjacency list */ for (i=0; idbglvl, METIS_DBG_INFO, + printf(" Compression: reduction in # of vertices: %"PRIDX".\n", nvtxs-cnvtxs)); - if (cnvtxs >= COMPRESSION_FRACTION*nvtxs) { - graph->nvtxs = nvtxs; - graph->nedges = xadj[nvtxs]; - graph->ncon = 1; - graph->xadj = xadj; - graph->adjncy = adjncy; - graph->gdata = idxmalloc(3*nvtxs+graph->nedges, "CompressGraph: gdata"); - graph->vwgt = graph->gdata; - graph->adjwgtsum = graph->gdata+nvtxs; - graph->cmap = graph->gdata+2*nvtxs; - graph->adjwgt = graph->gdata+3*nvtxs; + if (cnvtxs < COMPRESSION_FRACTION*nvtxs) { + /* Sufficient compression is possible, so go ahead and create the + compressed graph */ - idxset(nvtxs, 1, graph->vwgt); - idxset(graph->nedges, 1, graph->adjwgt); - for (i=0; iadjwgtsum[i] = xadj[i+1]-xadj[i]; + graph = CreateGraph(); - graph->label = idxmalloc(nvtxs, "CompressGraph: label"); - for (i=0; ilabel[i] = i; - } - else { /* Ok, form the compressed graph */ cnedges = 0; for (i=0; igdata = idxmalloc(4*cnvtxs+1 + 2*cnedges, "CompressGraph: gdata"); - cxadj = graph->xadj = graph->gdata; - cvwgt = graph->vwgt = graph->gdata + cnvtxs+1; - graph->adjwgtsum = graph->gdata + 2*cnvtxs+1; - graph->cmap = graph->gdata + 3*cnvtxs+1; - cadjncy = graph->adjncy = graph->gdata + 4*cnvtxs+1; - graph->adjwgt = graph->gdata + 4*cnvtxs+1 + cnedges; + /* Allocate memory for the compressed graph */ + cxadj = graph->xadj = imalloc(cnvtxs+1, "CompressGraph: xadj"); + cvwgt = graph->vwgt = ismalloc(cnvtxs, 0, "CompressGraph: vwgt"); + cadjncy = graph->adjncy = imalloc(cnedges, "CompressGraph: adjncy"); + graph->adjwgt = ismalloc(cnedges, 1, "CompressGraph: adjwgt"); /* Now go and compress the graph */ - idxset(nvtxs, -1, mark); + iset(nvtxs, -1, mark); l = cxadj[0] = 0; for (i=0; invtxs = cnvtxs; + graph->nvtxs = cnvtxs; graph->nedges = l; - graph->ncon = 1; + graph->ncon = 1; - idxset(graph->nedges, 1, graph->adjwgt); - for (i=0; iadjwgtsum[i] = cxadj[i+1]-cxadj[i]; + SetupGraph_tvwgt(graph); + SetupGraph_label(graph); + } - graph->label = idxmalloc(cnvtxs, "CompressGraph: label"); - for (i=0; ilabel[i] = i; + gk_free((void **)&keys, &map, &mark, LTERM); - } + return graph; - GKfree((void **)(&keys), &map, &mark, LTERM); } -/************************************************************************* -* This function prunes all the vertices in a graph with degree greater -* than factor*average -**************************************************************************/ -void PruneGraph(CtrlType *ctrl, GraphType *graph, int nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *iperm, float factor) +/*************************************************************************/ +/*! This function prunes all the vertices in a graph with degree greater + than factor*average. + + \returns the number of vertices that were prunned. +*/ +/*************************************************************************/ +graph_t *PruneGraph(ctrl_t *ctrl, idx_t nvtxs, idx_t *xadj, idx_t *adjncy, + idx_t *vwgt, idx_t *iperm, real_t factor) { - int i, j, k, l, nlarge, pnvtxs, pnedges; - idxtype *pxadj, *padjncy, *padjwgt, *pvwgt; - idxtype *perm; + idx_t i, j, k, l, nlarge, pnvtxs, pnedges; + idx_t *pxadj, *padjncy, *padjwgt, *pvwgt; + idx_t *perm; + graph_t *graph=NULL; - perm = idxmalloc(nvtxs, "PruneGraph: perm"); + perm = imalloc(nvtxs, "PruneGraph: perm"); factor = factor*xadj[nvtxs]/nvtxs; @@ -180,45 +172,25 @@ void PruneGraph(CtrlType *ctrl, GraphType *graph, int nvtxs, idxtype *xadj, idxt } } - /* printf("Pruned %d vertices\n", nlarge); */ - - InitGraph(graph); + IFSET(ctrl->dbglvl, METIS_DBG_INFO, + printf(" Pruned %"PRIDX" of %"PRIDX" vertices.\n", nlarge, nvtxs)); - if (nlarge == 0) { /* No prunning */ - graph->nvtxs = nvtxs; - graph->nedges = xadj[nvtxs]; - graph->ncon = 1; - graph->xadj = xadj; - graph->adjncy = adjncy; - graph->gdata = idxmalloc(3*nvtxs+graph->nedges, "CompressGraph: gdata"); - graph->vwgt = graph->gdata; - graph->adjwgtsum = graph->gdata+nvtxs; - graph->cmap = graph->gdata+2*nvtxs; - graph->adjwgt = graph->gdata+3*nvtxs; + if (nlarge > 0 && nlarge < nvtxs) { + /* Prunning is possible, so go ahead and create the prunned graph */ + graph = CreateGraph(); - idxset(nvtxs, 1, graph->vwgt); - idxset(graph->nedges, 1, graph->adjwgt); - for (i=0; iadjwgtsum[i] = xadj[i+1]-xadj[i]; - - graph->label = idxmalloc(nvtxs, "CompressGraph: label"); - for (i=0; ilabel[i] = i; - } - else { /* Prune the graph */ - /* Allocate memory for the compressed graph*/ - graph->gdata = idxmalloc(4*pnvtxs+1 + 2*pnedges, "PruneGraph: gdata"); - pxadj = graph->xadj = graph->gdata; - graph->vwgt = graph->gdata + pnvtxs+1; - graph->adjwgtsum = graph->gdata + 2*pnvtxs+1; - graph->cmap = graph->gdata + 3*pnvtxs+1; - padjncy = graph->adjncy = graph->gdata + 4*pnvtxs+1; - graph->adjwgt = graph->gdata + 4*pnvtxs+1 + pnedges; + /* Allocate memory for the prunned graph*/ + pxadj = graph->xadj = imalloc(pnvtxs+1, "PruneGraph: xadj"); + pvwgt = graph->vwgt = imalloc(pnvtxs, "PruneGraph: vwgt"); + padjncy = graph->adjncy = imalloc(pnedges, "PruneGraph: adjncy"); + graph->adjwgt = ismalloc(pnedges, 1, "PruneGraph: adjwgt"); pxadj[0] = pnedges = l = 0; for (i=0; invtxs = pnvtxs; + graph->nvtxs = pnvtxs; graph->nedges = pnedges; - graph->ncon = 1; + graph->ncon = 1; - idxset(pnvtxs, 1, graph->vwgt); - idxset(pnedges, 1, graph->adjwgt); - for (i=0; iadjwgtsum[i] = pxadj[i+1]-pxadj[i]; - - graph->label = idxmalloc(pnvtxs, "CompressGraph: label"); - for (i=0; ilabel[i] = i; + SetupGraph_tvwgt(graph); + SetupGraph_label(graph); + } + else if (nlarge > 0 && nlarge == nvtxs) { + IFSET(ctrl->dbglvl, METIS_DBG_INFO, + printf(" Pruning is ignored as it removes all vertices.\n")); + nlarge = 0; } - GKfree((void **)&perm, LTERM); + + + gk_free((void **)&perm, LTERM); + + return graph; } diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/contig.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/contig.c new file mode 100644 index 00000000..540c4f9b --- /dev/null +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/contig.c @@ -0,0 +1,699 @@ +/*! +\file +\brief Functions that deal with eliminating disconnected partitions + +\date Started 7/15/98 +\author George +\author Copyright 1997-2009, Regents of the University of Minnesota +\version $Id: contig.c 10513 2011-07-07 22:06:03Z karypis $ +*/ + +#include "metislib.h" + + +/*************************************************************************/ +/*! This function finds the connected components induced by the + partitioning vector. + + \param graph is the graph structure + \param where is the partitioning vector. If this is NULL, then the + entire graph is treated to belong into a single partition. + \param cptr is the ptr structure of the CSR representation of the + components. The length of this vector must be graph->nvtxs+1. + \param cind is the indices structure of the CSR representation of + the components. The length of this vector must be graph->nvtxs. + + \returns the number of components that it found. + + \note The cptr and cind parameters can be NULL, in which case only the + number of connected components is returned. +*/ +/*************************************************************************/ +idx_t FindPartitionInducedComponents(graph_t *graph, idx_t *where, + idx_t *cptr, idx_t *cind) +{ + idx_t i, ii, j, jj, k, me=0, nvtxs, first, last, nleft, ncmps; + idx_t *xadj, *adjncy; + idx_t *touched, *perm, *todo; + idx_t mustfree_ccsr=0, mustfree_where=0; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + /* Deal with NULL supplied cptr/cind vectors */ + if (cptr == NULL) { + cptr = imalloc(nvtxs+1, "FindPartitionInducedComponents: cptr"); + cind = imalloc(nvtxs, "FindPartitionInducedComponents: cind"); + mustfree_ccsr = 1; + } + + /* Deal with NULL supplied where vector */ + if (where == NULL) { + where = ismalloc(nvtxs, 0, "FindPartitionInducedComponents: where"); + mustfree_where = 1; + } + + /* Allocate memory required for the BFS traversal */ + perm = iincset(nvtxs, 0, imalloc(nvtxs, "FindPartitionInducedComponents: perm")); + todo = iincset(nvtxs, 0, imalloc(nvtxs, "FindPartitionInducedComponents: todo")); + touched = ismalloc(nvtxs, 0, "FindPartitionInducedComponents: touched"); + + + /* Find the connected componends induced by the partition */ + ncmps = -1; + first = last = 0; + nleft = nvtxs; + while (nleft > 0) { + if (first == last) { /* Find another starting vertex */ + cptr[++ncmps] = first; + ASSERT(touched[todo[0]] == 0); + i = todo[0]; + cind[last++] = i; + touched[i] = 1; + me = where[i]; + } + + i = cind[first++]; + k = perm[i]; + j = todo[k] = todo[--nleft]; + perm[j] = k; + + for (j=xadj[i]; jnvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + /* Allocate memory required for the BFS traversal */ + perm = iincset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + + iincset(nvtxs, 0, bfsperm); /* this array will also store the vertices + still to be processed */ + + /* Find the connected componends induced by the partition */ + first = last = 0; + while (first < nvtxs) { + if (first == last) { /* Find another starting vertex */ + k = bfsperm[last]; + ASSERT(perm[k] != -1); + perm[k] = -1; /* mark node as being visited */ + last++; + } + + i = bfsperm[first++]; + for (j=xadj[i]; jnvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + where = graph->where; + + touched = ismalloc(nvtxs, 0, "IsConnected: touched"); + queue = imalloc(nvtxs, "IsConnected: queue"); + cptr = imalloc(nvtxs+1, "IsConnected: cptr"); + + nleft = 0; + for (i=0; i 1 && report) { + printf("The graph has %"PRIDX" connected components in partition %"PRIDX":\t", ncmps, pid); + for (i=0; ivwgt[queue[j]]; + printf("[%5"PRIDX" %5"PRIDX"] ", cptr[i+1]-cptr[i], wgt); + /* + if (cptr[i+1]-cptr[i] == 1) + printf("[%"PRIDX" %"PRIDX"] ", queue[cptr[i]], xadj[queue[cptr[i]]+1]-xadj[queue[cptr[i]]]); + */ + } + printf("\n"); + } + + gk_free((void **)&touched, &queue, &cptr, LTERM); + + return (ncmps == 1 ? 1 : 0); +} + + +/*************************************************************************/ +/*! This function identifies the number of connected components in a graph + that result after removing the vertices that belong to the vertex + separator (i.e., graph->where[i] == 2). + The connected component memberships are returned in the CSR-style + pair of arrays cptr, cind. +*/ +/**************************************************************************/ +idx_t FindSepInducedComponents(ctrl_t *ctrl, graph_t *graph, idx_t *cptr, + idx_t *cind) +{ + idx_t i, j, k, nvtxs, first, last, nleft, ncmps, wgt; + idx_t *xadj, *adjncy, *where, *touched, *queue; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + where = graph->where; + + touched = ismalloc(nvtxs, 0, "IsConnected: queue"); + + for (i=0; inbnd; i++) + touched[graph->bndind[i]] = 1; + + queue = cind; + + nleft = 0; + for (i=0; iwhere and tries to push them around to + remove some of them. */ +/*************************************************************************/ +void EliminateComponents(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, ii, j, jj, k, me, nparts, nvtxs, ncon, ncmps, other, + ncand, target; + idx_t *xadj, *adjncy, *vwgt, *adjwgt, *where, *pwgts; + idx_t *cptr, *cind, *cpvec, *pcptr, *pcind, *cwhere; + idx_t cid, bestcid, *cwgt, *bestcwgt; + idx_t ntodo, oldntodo, *todo; + rkv_t *cand; + real_t *tpwgts; + idx_t *vmarker=NULL, *pmarker=NULL, *modind=NULL; /* volume specific work arrays */ + + WCOREPUSH; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + adjncy = graph->adjncy; + vwgt = graph->vwgt; + adjwgt = (ctrl->objtype == METIS_OBJTYPE_VOL ? NULL : graph->adjwgt); + + where = graph->where; + pwgts = graph->pwgts; + + nparts = ctrl->nparts; + tpwgts = ctrl->tpwgts; + + cptr = iwspacemalloc(ctrl, nvtxs+1); + cind = iwspacemalloc(ctrl, nvtxs); + + ncmps = FindPartitionInducedComponents(graph, where, cptr, cind); + + IFSET(ctrl->dbglvl, METIS_DBG_CONTIGINFO, + printf("I found %"PRIDX" components, for this %"PRIDX"-way partition\n", + ncmps, nparts)); + + /* There are more components than partitions */ + if (ncmps > nparts) { + cwgt = iwspacemalloc(ctrl, ncon); + bestcwgt = iwspacemalloc(ctrl, ncon); + cpvec = iwspacemalloc(ctrl, nparts); + pcptr = iset(nparts+1, 0, iwspacemalloc(ctrl, nparts+1)); + pcind = iwspacemalloc(ctrl, ncmps); + cwhere = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs)); + todo = iwspacemalloc(ctrl, ncmps); + cand = (rkv_t *)wspacemalloc(ctrl, nparts*sizeof(rkv_t)); + + if (ctrl->objtype == METIS_OBJTYPE_VOL) { + /* Vol-refinement specific working arrays */ + modind = iwspacemalloc(ctrl, nvtxs); + vmarker = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + pmarker = iset(nparts, -1, iwspacemalloc(ctrl, nparts)); + } + + + /* Get a CSR representation of the components-2-partitions mapping */ + for (i=0; i 0) { + oldntodo = ntodo; + for (i=0; idbglvl, METIS_DBG_CONTIGINFO, + printf("Trying to move %"PRIDX" [%"PRIDX"] from %"PRIDX"\n", + cid, isum(ncon, cwgt, 1), me)); + + /* Determine the connectivity */ + iset(nparts, 0, cpvec); + for (j=cptr[cid]; j 0) { + cand[ncand].key = cpvec[j]; + cand[ncand++].val = j; + } + } + if (ncand == 0) + continue; + + rkvsortd(ncand, cand); + + /* Limit the moves to only the top candidates, which are defined as + those with connectivity at least 50% of the best. + This applies only when ncon=1, as for multi-constraint, balancing + will be hard. */ + if (ncon == 1) { + for (j=1; jubfactors, + 1, pwgts+target*ncon, ctrl->pijbm+target*ncon, + 1, pwgts+cand[j].val*ncon, ctrl->pijbm+cand[j].val*ncon)) + target = cand[j].val; + } + + IFSET(ctrl->dbglvl, METIS_DBG_CONTIGINFO, + printf("\tMoving it to %"PRIDX" [%"PRIDX"] [%"PRIDX"]\n", target, cpvec[target], ncand)); + + /* Note that as a result of a previous movement, a connected component may + now will like to stay to its original partition */ + if (target != me) { + switch (ctrl->objtype) { + case METIS_OBJTYPE_CUT: + MoveGroupContigForCut(ctrl, graph, target, cid, cptr, cind); + break; + + case METIS_OBJTYPE_VOL: + MoveGroupContigForVol(ctrl, graph, target, cid, cptr, cind, + vmarker, pmarker, modind); + break; + + default: + gk_errexit(SIGERR, "Unknown objtype %d\n", ctrl->objtype); + } + } + + /* Update the cwhere vector */ + for (j=cptr[cid]; jdbglvl, METIS_DBG_CONTIGINFO, printf("Stopped at ntodo: %"PRIDX"\n", ntodo)); + break; + } + } + + for (i=0; invtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + + where = graph->where; + bndptr = graph->bndptr; + bndind = graph->bndind; + + nbnd = graph->nbnd; + + for (iii=ptr[gid]; iiickrinfo+i; + if (myrinfo->inbr == -1) { + myrinfo->inbr = cnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]); + myrinfo->nnbrs = 0; + } + mynbrs = ctrl->cnbrpool + myrinfo->inbr; + + /* find the location of 'to' in myrinfo or create it if it is not there */ + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == to) + break; + } + if (k == myrinfo->nnbrs) { + mynbrs[k].pid = to; + mynbrs[k].ed = 0; + myrinfo->nnbrs++; + } + + graph->mincut -= mynbrs[k].ed-myrinfo->id; + + /* Update ID/ED and BND related information for the moved vertex */ + iaxpy(graph->ncon, 1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+to*graph->ncon, 1); + iaxpy(graph->ncon, -1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+from*graph->ncon, 1); + UpdateMovedVertexInfoAndBND(i, from, k, to, myrinfo, mynbrs, where, nbnd, + bndptr, bndind, BNDTYPE_REFINE); + + /* Update the degrees of adjacent vertices */ + for (j=xadj[i]; jckrinfo+ii; + + UpdateAdjacentVertexInfoAndBND(ctrl, ii, xadj[ii+1]-xadj[ii], me, + from, to, myrinfo, adjwgt[j], nbnd, bndptr, bndind, BNDTYPE_REFINE); + } + + ASSERT(CheckRInfo(ctrl, graph->ckrinfo+i)); + } + + graph->nbnd = nbnd; +} + + +/*************************************************************************/ +/*! This function moves a collection of vertices and updates their rinfo + */ +/*************************************************************************/ +void MoveGroupContigForVol(ctrl_t *ctrl, graph_t *graph, idx_t to, idx_t gid, + idx_t *ptr, idx_t *ind, idx_t *vmarker, idx_t *pmarker, + idx_t *modind) +{ + idx_t i, ii, iii, j, jj, k, l, nvtxs, from, me, other, xgain; + idx_t *xadj, *vsize, *adjncy, *where; + vkrinfo_t *myrinfo, *orinfo; + vnbr_t *mynbrs, *onbrs; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vsize = graph->vsize; + adjncy = graph->adjncy; + where = graph->where; + + for (iii=ptr[gid]; iiivkrinfo+i; + if (myrinfo->inbr == -1) { + myrinfo->inbr = vnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]); + myrinfo->nnbrs = 0; + } + mynbrs = ctrl->vnbrpool + myrinfo->inbr; + + xgain = (myrinfo->nid == 0 && myrinfo->ned > 0 ? vsize[i] : 0); + + /* find the location of 'to' in myrinfo or create it if it is not there */ + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == to) + break; + } + if (k == myrinfo->nnbrs) { + if (myrinfo->nid > 0) + xgain -= vsize[i]; + + /* determine the volume gain resulting from that move */ + for (j=xadj[i]; jvkrinfo+ii; + onbrs = ctrl->vnbrpool + orinfo->inbr; + ASSERT(other != to) + + if (from == other) { + /* Same subdomain vertex: Decrease the gain if 'to' is a new neighbor. */ + for (l=0; lnnbrs; l++) { + if (onbrs[l].pid == to) + break; + } + if (l == orinfo->nnbrs) + xgain -= vsize[ii]; + } + else { + /* Remote vertex: increase if 'to' is a new subdomain */ + for (l=0; lnnbrs; l++) { + if (onbrs[l].pid == to) + break; + } + if (l == orinfo->nnbrs) + xgain -= vsize[ii]; + + /* Remote vertex: decrease if i is the only connection to 'from' */ + for (l=0; lnnbrs; l++) { + if (onbrs[l].pid == from && onbrs[l].ned == 1) { + xgain += vsize[ii]; + break; + } + } + } + } + graph->minvol -= xgain; + graph->mincut -= -myrinfo->nid; + } + else { + graph->minvol -= (xgain + mynbrs[k].gv); + graph->mincut -= mynbrs[k].ned-myrinfo->nid; + } + + + /* Update where and pwgts */ + where[i] = to; + iaxpy(graph->ncon, 1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+to*graph->ncon, 1); + iaxpy(graph->ncon, -1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+from*graph->ncon, 1); + + /* Update the id/ed/gains/bnd of potentially affected nodes */ + KWayVolUpdate(ctrl, graph, i, from, to, NULL, NULL, NULL, NULL, + NULL, BNDTYPE_REFINE, vmarker, pmarker, modind); + + /*CheckKWayVolPartitionParams(ctrl, graph);*/ + } + + ASSERT(ComputeCut(graph, where) == graph->mincut); + ASSERTP(ComputeVolume(graph, where) == graph->minvol, + ("%"PRIDX" %"PRIDX"\n", ComputeVolume(graph, where), graph->minvol)); + +} + diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/debug.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/debug.c index b71fe2f4..f09be2fd 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/debug.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/debug.c @@ -3,23 +3,24 @@ * * debug.c * - * This file contains code that performs self debuging + * This file contains code that performs self debugging * * Started 7/24/97 * George * - * $Id: debug.c,v 1.1 2003/07/16 15:55:01 karypis Exp $ - * */ -#include +#include "metislib.h" + + -/************************************************************************* -* This function computes the cut given the graph and a where vector -**************************************************************************/ -int ComputeCut(GraphType *graph, idxtype *where) +/*************************************************************************/ +/*! This function computes the total edgecut + */ +/*************************************************************************/ +idx_t ComputeCut(graph_t *graph, idx_t *where) { - int i, j, cut; + idx_t i, j, cut; if (graph->adjwgt == NULL) { for (cut=0, i=0; invtxs; i++) { @@ -40,13 +41,87 @@ int ComputeCut(GraphType *graph, idxtype *where) } -/************************************************************************* -* This function checks whether or not the boundary information is correct -**************************************************************************/ -int CheckBnd(GraphType *graph) +/*************************************************************************/ +/*! This function computes the total volume + */ +/*************************************************************************/ +idx_t ComputeVolume(graph_t *graph, idx_t *where) +{ + idx_t i, j, k, me, nvtxs, nparts, totalv; + idx_t *xadj, *adjncy, *vsize, *marker; + + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + vsize = graph->vsize; + + nparts = where[iargmax(nvtxs, where,1)]+1; + marker = ismalloc(nparts, -1, "ComputeVolume: marker"); + + totalv = 0; + + for (i=0; iadjwgt == NULL) { + for (i=0; invtxs; i++) { + for (j=graph->xadj[i]; jxadj[i+1]; j++) + if (where[i] != where[graph->adjncy[j]]) + cuts[where[i]]++; + } + } + else { + for (i=0; invtxs; i++) { + for (j=graph->xadj[i]; jxadj[i+1]; j++) + if (where[i] != where[graph->adjncy[j]]) + cuts[where[i]] += graph->adjwgt[j]; + } + } + + maxcut = cuts[iargmax(nparts, cuts,1)]; + + printf("%zu => %"PRIDX"\n", iargmax(nparts, cuts,1), maxcut); + + gk_free((void **)&cuts, LTERM); + + return maxcut; +} + + +/*************************************************************************/ +/*! This function checks whether or not the boundary information is correct + */ +/*************************************************************************/ +idx_t CheckBnd(graph_t *graph) { - int i, j, nvtxs, nbnd; - idxtype *xadj, *adjncy, *where, *bndptr, *bndind; + idx_t i, j, nvtxs, nbnd; + idx_t *xadj, *adjncy, *where, *bndptr, *bndind; nvtxs = graph->nvtxs; xadj = graph->xadj; @@ -69,25 +144,26 @@ int CheckBnd(GraphType *graph) } } - ASSERTP(nbnd == graph->nbnd, ("%d %d\n", nbnd, graph->nbnd)); + ASSERTP(nbnd == graph->nbnd, ("%"PRIDX" %"PRIDX"\n", nbnd, graph->nbnd)); return 1; } -/************************************************************************* -* This function checks whether or not the boundary information is correct -**************************************************************************/ -int CheckBnd2(GraphType *graph) +/*************************************************************************/ +/*! This function checks whether or not the boundary information is correct + */ +/*************************************************************************/ +idx_t CheckBnd2(graph_t *graph) { - int i, j, nvtxs, nbnd, id, ed; - idxtype *xadj, *adjncy, *where, *bndptr, *bndind; + idx_t i, j, nvtxs, nbnd, id, ed; + idx_t *xadj, *adjncy, *where, *bndptr, *bndind; - nvtxs = graph->nvtxs; - xadj = graph->xadj; + nvtxs = graph->nvtxs; + xadj = graph->xadj; adjncy = graph->adjncy; - where = graph->where; + where = graph->where; bndptr = graph->bndptr; bndind = graph->bndind; @@ -101,23 +177,25 @@ int CheckBnd2(GraphType *graph) } if (ed - id >= 0 && xadj[i] < xadj[i+1]) { nbnd++; - ASSERTP(bndptr[i] != -1, ("%d %d %d\n", i, id, ed)); + ASSERTP(bndptr[i] != -1, ("%"PRIDX" %"PRIDX" %"PRIDX"\n", i, id, ed)); ASSERT(bndind[bndptr[i]] == i); } } - ASSERTP(nbnd == graph->nbnd, ("%d %d\n", nbnd, graph->nbnd)); + ASSERTP(nbnd == graph->nbnd, ("%"PRIDX" %"PRIDX"\n", nbnd, graph->nbnd)); return 1; } -/************************************************************************* -* This function checks whether or not the boundary information is correct -**************************************************************************/ -int CheckNodeBnd(GraphType *graph, int onbnd) + +/*************************************************************************/ +/*! This function checks whether or not the boundary information is correct + */ +/*************************************************************************/ +idx_t CheckNodeBnd(graph_t *graph, idx_t onbnd) { - int i, j, nvtxs, nbnd; - idxtype *xadj, *adjncy, *where, *bndptr, *bndind; + idx_t i, j, nvtxs, nbnd; + idx_t *xadj, *adjncy, *where, *bndptr, *bndind; nvtxs = graph->nvtxs; xadj = graph->xadj; @@ -131,14 +209,14 @@ int CheckNodeBnd(GraphType *graph, int onbnd) nbnd++; } - ASSERTP(nbnd == onbnd, ("%d %d\n", nbnd, onbnd)); + ASSERTP(nbnd == onbnd, ("%"PRIDX" %"PRIDX"\n", nbnd, onbnd)); for (i=0; indegrees; i++) { - for (j=i+1; jndegrees; j++) - ASSERTP(rinfo->edegrees[i].pid != rinfo->edegrees[j].pid, ("%d %d %d %d\n", i, j, rinfo->edegrees[i].pid, rinfo->edegrees[j].pid)); + ASSERT(ctrl->nbrpoolcpos >= 0); + ASSERT(rinfo->nnbrs < ctrl->nparts); + + nbrs = ctrl->cnbrpool + rinfo->inbr; + + for (i=0; innbrs; i++) { + for (j=i+1; jnnbrs; j++) + ASSERTP(nbrs[i].pid != nbrs[j].pid, + ("%"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"\n", + i, j, nbrs[i].pid, nbrs[j].pid)); } return 1; @@ -164,22 +251,22 @@ int CheckRInfo(RInfoType *rinfo) -/************************************************************************* -* This function checks the correctness of the NodeFM data structures -**************************************************************************/ -int CheckNodePartitionParams(GraphType *graph) +/*************************************************************************/ +/*! This function checks the correctness of the NodeFM data structures + */ +/*************************************************************************/ +idx_t CheckNodePartitionParams(graph_t *graph) { - int i, j, k, l, nvtxs, me, other; - idxtype *xadj, *adjncy, *adjwgt, *vwgt, *where; - idxtype edegrees[2], pwgts[3]; + idx_t i, j, k, l, nvtxs, me, other; + idx_t *xadj, *adjncy, *adjwgt, *vwgt, *where; + idx_t edegrees[2], pwgts[3]; - nvtxs = graph->nvtxs; - xadj = graph->xadj; - vwgt = graph->vwgt; + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vwgt = graph->vwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; - - where = graph->where; + where = graph->where; /*------------------------------------------------------------ / Compute now the separator external degrees @@ -197,27 +284,35 @@ int CheckNodePartitionParams(GraphType *graph) if (other != 2) edegrees[other] += vwgt[adjncy[j]]; } - if (edegrees[0] != graph->nrinfo[i].edegrees[0] || edegrees[1] != graph->nrinfo[i].edegrees[1]) { - printf("Something wrong with edegrees: %d %d %d %d %d\n", i, edegrees[0], edegrees[1], graph->nrinfo[i].edegrees[0], graph->nrinfo[i].edegrees[1]); + if (edegrees[0] != graph->nrinfo[i].edegrees[0] || + edegrees[1] != graph->nrinfo[i].edegrees[1]) { + printf("Something wrong with edegrees: %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"\n", + i, edegrees[0], edegrees[1], + graph->nrinfo[i].edegrees[0], graph->nrinfo[i].edegrees[1]); return 0; } } } - if (pwgts[0] != graph->pwgts[0] || pwgts[1] != graph->pwgts[1] || pwgts[2] != graph->pwgts[2]) - printf("Something wrong with part-weights: %d %d %d %d %d %d\n", pwgts[0], pwgts[1], pwgts[2], graph->pwgts[0], graph->pwgts[1], graph->pwgts[2]); + if (pwgts[0] != graph->pwgts[0] || + pwgts[1] != graph->pwgts[1] || + pwgts[2] != graph->pwgts[2]) { + printf("Something wrong with part-weights: %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"\n", pwgts[0], pwgts[1], pwgts[2], graph->pwgts[0], graph->pwgts[1], graph->pwgts[2]); + return 0; + } return 1; } -/************************************************************************* -* This function checks if the separator is indeed a separator -**************************************************************************/ -int IsSeparable(GraphType *graph) +/*************************************************************************/ +/*! This function checks if the separator is indeed a separator + */ +/*************************************************************************/ +idx_t IsSeparable(graph_t *graph) { - int i, j, nvtxs, other; - idxtype *xadj, *adjncy, *where; + idx_t i, j, nvtxs, other; + idx_t *xadj, *adjncy, *where; nvtxs = graph->nvtxs; xadj = graph->xadj; @@ -229,7 +324,10 @@ int IsSeparable(GraphType *graph) continue; other = (where[i]+1)%2; for (j=xadj[i]; jvrinfo structure */ +/*************************************************************************/ +void CheckKWayVolPartitionParams(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, ii, j, k, kk, l, nvtxs, nbnd, mincut, minvol, me, other, pid; + idx_t *xadj, *vsize, *adjncy, *pwgts, *where, *bndind, *bndptr; + vkrinfo_t *rinfo, *myrinfo, *orinfo, tmprinfo; + vnbr_t *mynbrs, *onbrs, *tmpnbrs; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vsize = graph->vsize; + adjncy = graph->adjncy; + where = graph->where; + rinfo = graph->vkrinfo; + + tmpnbrs = (vnbr_t *)wspacemalloc(ctrl, ctrl->nparts*sizeof(vnbr_t)); + + /*------------------------------------------------------------ + / Compute now the iv/ev degrees + /------------------------------------------------------------*/ + for (i=0; ivnbrpool + myrinfo->inbr; + + for (k=0; knnbrs; k++) + tmpnbrs[k] = mynbrs[k]; + + tmprinfo.nnbrs = myrinfo->nnbrs; + tmprinfo.nid = myrinfo->nid; + tmprinfo.ned = myrinfo->ned; + + myrinfo = &tmprinfo; + mynbrs = tmpnbrs; + + for (k=0; knnbrs; k++) + mynbrs[k].gv = 0; + + for (j=xadj[i]; jvnbrpool + orinfo->inbr; + + if (me == other) { + /* Find which domains 'i' is connected and 'ii' is not and update their gain */ + for (k=0; knnbrs; k++) { + pid = mynbrs[k].pid; + for (kk=0; kknnbrs; kk++) { + if (onbrs[kk].pid == pid) + break; + } + if (kk == orinfo->nnbrs) + mynbrs[k].gv -= vsize[ii]; + } + } + else { + /* Find the orinfo[me].ed and see if I'm the only connection */ + for (k=0; knnbrs; k++) { + if (onbrs[k].pid == me) + break; + } + + if (onbrs[k].ned == 1) { /* I'm the only connection of 'ii' in 'me' */ + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == other) { + mynbrs[k].gv += vsize[ii]; + break; + } + } + + /* Increase the gains for all the common domains between 'i' and 'ii' */ + for (k=0; knnbrs; k++) { + if ((pid = mynbrs[k].pid) == other) + continue; + for (kk=0; kknnbrs; kk++) { + if (onbrs[kk].pid == pid) { + mynbrs[k].gv += vsize[ii]; + break; + } + } + } + + } + else { + /* Find which domains 'i' is connected and 'ii' is not and update their gain */ + for (k=0; knnbrs; k++) { + if ((pid = mynbrs[k].pid) == other) + continue; + for (kk=0; kknnbrs; kk++) { + if (onbrs[kk].pid == pid) + break; + } + if (kk == orinfo->nnbrs) + mynbrs[k].gv -= vsize[ii]; + } + } + } + } + + myrinfo = rinfo+i; + mynbrs = ctrl->vnbrpool + myrinfo->inbr; + + for (k=0; knnbrs; k++) { + pid = mynbrs[k].pid; + for (kk=0; kk - -/************************************************************************* -* This function computes how much memory will be required by the various -* routines in METIS -**************************************************************************/ -void METIS_EstimateMemory(int *nvtxs, idxtype *xadj, idxtype *adjncy, int *numflag, int *optype, int *nbytes) -{ - int i, j, k, nedges, nlevels; - float vfraction, efraction, vmult, emult; - int coresize, gdata, rdata; - - if (*numflag == 1) - Change2CNumbering(*nvtxs, xadj, adjncy); - - nedges = xadj[*nvtxs]; - - InitRandom(-1); - EstimateCFraction(*nvtxs, xadj, adjncy, &vfraction, &efraction); - - /* Estimate the amount of memory for coresize */ - if (*optype == 2) - coresize = nedges; - else - coresize = 0; - coresize += nedges + 11*(*nvtxs) + 4*1024 + 2*(NEG_GAINSPAN+PLUS_GAINSPAN+1)*(sizeof(ListNodeType *)/sizeof(idxtype)); - coresize += 2*(*nvtxs); /* add some more fore other vectors */ - - gdata = nedges; /* Assume that the user does not pass weights */ - - nlevels = (int)(log(100.0/(*nvtxs))/log(vfraction) + .5); - vmult = 0.5 + (1.0 - pow(vfraction, nlevels))/(1.0 - vfraction); - emult = 1.0 + (1.0 - pow(efraction, nlevels+1))/(1.0 - efraction); - - gdata += vmult*4*(*nvtxs) + emult*2*nedges; - if ((vmult-1.0)*4*(*nvtxs) + (emult-1.0)*2*nedges < 5*(*nvtxs)) - rdata = 0; - else - rdata = 5*(*nvtxs); - - *nbytes = sizeof(idxtype)*(coresize+gdata+rdata+(*nvtxs)); - - if (*numflag == 1) - Change2FNumbering2(*nvtxs, xadj, adjncy); -} - - -/************************************************************************* -* This function finds a matching using the HEM heuristic -**************************************************************************/ -void EstimateCFraction(int nvtxs, idxtype *xadj, idxtype *adjncy, float *vfraction, float *efraction) -{ - int i, ii, j, cnvtxs, cnedges, maxidx; - idxtype *match, *cmap, *perm; - - cmap = idxmalloc(nvtxs, "cmap"); - match = idxsmalloc(nvtxs, UNMATCHED, "match"); - perm = idxmalloc(nvtxs, "perm"); - RandomPermute(nvtxs, perm, 1); - - cnvtxs = 0; - for (ii=0; ii +/*! +\file +\brief Functions for the edge-based FM refinement + +\date Started 7/23/97 +\author George +\author Copyright 1997-2011, Regents of the University of Minnesota +\version\verbatim $Id: fm.c 10187 2011-06-13 13:46:57Z karypis $ \endverbatim +*/ + +#include "metislib.h" /************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ -void FM_2WayEdgeRefine(CtrlType *ctrl, GraphType *graph, int *tpwgts, int npasses) +void FM_2WayRefine(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niter) { - int i, ii, j, k, kwgt, nvtxs, nbnd, nswaps, from, to, pass, me, limit, tmp; - idxtype *xadj, *vwgt, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind, *pwgts; - idxtype *moved, *swaps, *perm; - PQueueType parts[2]; - int higain, oldgain, mincut, mindiff, origdiff, initcut, newcut, mincutorder, avgvwgt; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - vwgt = graph->vwgt; + if (graph->ncon == 1) + FM_2WayCutRefine(ctrl, graph, ntpwgts, niter); + else + FM_Mc2WayCutRefine(ctrl, graph, ntpwgts, niter); +} + + +/*************************************************************************/ +/*! This function performs a cut-focused FM refinement */ +/*************************************************************************/ +void FM_2WayCutRefine(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niter) +{ + idx_t i, ii, j, k, kwgt, nvtxs, nbnd, nswaps, from, to, pass, me, limit, tmp; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind, *pwgts; + idx_t *moved, *swaps, *perm; + rpq_t *queues[2]; + idx_t higain, mincut, mindiff, origdiff, initcut, newcut, mincutorder, avgvwgt; + idx_t tpwgts[2]; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vwgt = graph->vwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; - where = graph->where; - id = graph->id; - ed = graph->ed; - pwgts = graph->pwgts; + where = graph->where; + id = graph->id; + ed = graph->ed; + pwgts = graph->pwgts; bndptr = graph->bndptr; bndind = graph->bndind; - moved = idxwspacemalloc(ctrl, nvtxs); - swaps = idxwspacemalloc(ctrl, nvtxs); - perm = idxwspacemalloc(ctrl, nvtxs); + moved = iwspacemalloc(ctrl, nvtxs); + swaps = iwspacemalloc(ctrl, nvtxs); + perm = iwspacemalloc(ctrl, nvtxs); - limit = amin(amax(0.01*nvtxs, 15), 100); - avgvwgt = amin((pwgts[0]+pwgts[1])/20, 2*(pwgts[0]+pwgts[1])/nvtxs); + tpwgts[0] = graph->tvwgt[0]*ntpwgts[0]; + tpwgts[1] = graph->tvwgt[0]-tpwgts[0]; + + limit = gk_min(gk_max(0.01*nvtxs, 15), 100); + avgvwgt = gk_min((pwgts[0]+pwgts[1])/20, 2*(pwgts[0]+pwgts[1])/nvtxs); - tmp = graph->adjwgtsum[idxamax(nvtxs, graph->adjwgtsum)]; - PQueueInit(ctrl, &parts[0], nvtxs, tmp); - PQueueInit(ctrl, &parts[1], nvtxs, tmp); + queues[0] = rpqCreate(nvtxs); + queues[1] = rpqCreate(nvtxs); - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("Partitions: [%6d %6d] T[%6d %6d], Nv-Nb[%6d %6d]. ICut: %6d\n", - pwgts[0], pwgts[1], tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + Print2WayRefineStats(ctrl, graph, ntpwgts, 0, -2)); - origdiff = abs(tpwgts[0]-pwgts[0]); - idxset(nvtxs, -1, moved); - for (pass=0; passmincut; - mindiff = abs(tpwgts[0]-pwgts[0]); + mindiff = iabs(tpwgts[0]-pwgts[0]); ASSERT(ComputeCut(graph, where) == graph->mincut); ASSERT(CheckBnd(graph)); /* Insert boundary nodes in the priority queues */ nbnd = graph->nbnd; - RandomPermute(nbnd, perm, 1); + irandArrayPermute(nbnd, perm, nbnd, 1); for (ii=0; ii 0 || id[bndind[i]] == 0); ASSERT(bndptr[bndind[i]] != -1); - PQueueInsert(&parts[where[bndind[i]]], bndind[i], ed[bndind[i]]-id[bndind[i]]); + rpqInsert(queues[where[bndind[i]]], bndind[i], ed[bndind[i]]-id[bndind[i]]); } for (nswaps=0; nswaps limit) { /* We hit the limit, undo last move */ @@ -102,8 +115,8 @@ void FM_2WayEdgeRefine(CtrlType *ctrl, GraphType *graph, int *tpwgts, int npasse moved[higain] = nswaps; swaps[nswaps] = higain; - IFSET(ctrl->dbglvl, DBG_MOVEINFO, - printf("Moved %6d from %d. [%3d %3d] %5d [%4d %4d]\n", higain, from, ed[higain]-id[higain], vwgt[higain], newcut, pwgts[0], pwgts[1])); + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("Moved %6"PRIDX" from %"PRIDX". [%3"PRIDX" %3"PRIDX"] %5"PRIDX" [%4"PRIDX" %4"PRIDX"]\n", higain, from, ed[higain]-id[higain], vwgt[higain], newcut, pwgts[0], pwgts[1])); /************************************************************** * Update the id[i]/ed[i] values of the affected nodes @@ -114,7 +127,6 @@ void FM_2WayEdgeRefine(CtrlType *ctrl, GraphType *graph, int *tpwgts, int npasse for (j=xadj[higain]; j 0) { /* It will now become a boundary vertex */ BNDInsert(nbnd, bndind, bndptr, k); if (moved[k] == -1) - PQueueInsert(&parts[where[k]], k, ed[k]-id[k]); + rpqInsert(queues[where[k]], k, ed[k]-id[k]); } } } @@ -172,23 +184,360 @@ void FM_2WayEdgeRefine(CtrlType *ctrl, GraphType *graph, int *tpwgts, int npasse } } - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("\tMinimum cut: %6d at %5d, PWGTS: [%6d %6d], NBND: %6d\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd)); + graph->mincut = mincut; + graph->nbnd = nbnd; + + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + Print2WayRefineStats(ctrl, graph, ntpwgts, 0, mincutorder)); + + if (mincutorder <= 0 || mincut == initcut) + break; + } + + rpqDestroy(queues[0]); + rpqDestroy(queues[1]); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function performs a cut-focused multi-constraint FM refinement */ +/*************************************************************************/ +void FM_Mc2WayCutRefine(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niter) +{ + idx_t i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, + me, limit, tmp, cnum; + idx_t *xadj, *adjncy, *vwgt, *adjwgt, *pwgts, *where, *id, *ed, + *bndptr, *bndind; + idx_t *moved, *swaps, *perm, *qnum; + idx_t higain, mincut, initcut, newcut, mincutorder; + real_t *invtvwgt, *ubfactors, *minbalv, *newbalv; + real_t origbal, minbal, newbal, rgain, ffactor; + rpq_t **queues; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + invtvwgt = graph->invtvwgt; + where = graph->where; + id = graph->id; + ed = graph->ed; + pwgts = graph->pwgts; + bndptr = graph->bndptr; + bndind = graph->bndind; + + moved = iwspacemalloc(ctrl, nvtxs); + swaps = iwspacemalloc(ctrl, nvtxs); + perm = iwspacemalloc(ctrl, nvtxs); + qnum = iwspacemalloc(ctrl, nvtxs); + ubfactors = rwspacemalloc(ctrl, ncon); + newbalv = rwspacemalloc(ctrl, ncon); + minbalv = rwspacemalloc(ctrl, ncon); + + limit = gk_min(gk_max(0.01*nvtxs, 25), 150); + + + /* Determine a fudge factor to allow the refinement routines to get out + of tight balancing constraints. */ + ffactor = .5/gk_max(20, nvtxs); + + /* Initialize the queues */ + queues = (rpq_t **)wspacemalloc(ctrl, 2*ncon*sizeof(rpq_t *)); + for (i=0; i<2*ncon; i++) + queues[i] = rpqCreate(nvtxs); + for (i=0; ipijbm, ctrl->ubfactors, ubfactors); + for (i=0; i 0 ? ctrl->ubfactors[i]+ubfactors[i] : ctrl->ubfactors[i]); + + + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + Print2WayRefineStats(ctrl, graph, ntpwgts, origbal, -2)); + + iset(nvtxs, -1, moved); + for (pass=0; passmincut; + + minbal = ComputeLoadImbalanceDiffVec(graph, 2, ctrl->pijbm, ubfactors, minbalv); + + ASSERT(ComputeCut(graph, where) == graph->mincut); + ASSERT(CheckBnd(graph)); + + /* Insert boundary nodes in the priority queues */ + nbnd = graph->nbnd; + irandArrayPermute(nbnd, perm, nbnd/5, 1); + for (ii=0; ii 0 || id[i] == 0); + ASSERT(bndptr[i] != -1); + //rgain = 1.0*(ed[i]-id[i])/sqrt(vwgt[i*ncon+qnum[i]]+1); + //rgain = (ed[i]-id[i] > 0 ? 1.0*(ed[i]-id[i])/sqrt(vwgt[i*ncon+qnum[i]]+1) : ed[i]-id[i]); + rgain = ed[i]-id[i]; + rpqInsert(queues[2*qnum[i]+where[i]], i, rgain); + } + + for (nswaps=0; nswapspijbm, ubfactors, queues, &from, &cnum); + + to = (from+1)%2; + + if (from == -1 || (higain = rpqGetTop(queues[2*cnum+from])) == -1) + break; + ASSERT(bndptr[higain] != -1); + + newcut -= (ed[higain]-id[higain]); + + iaxpy(ncon, 1, vwgt+higain*ncon, 1, pwgts+to*ncon, 1); + iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+from*ncon, 1); + newbal = ComputeLoadImbalanceDiffVec(graph, 2, ctrl->pijbm, ubfactors, newbalv); + + if ((newcut < mincut && newbal <= ffactor) || + (newcut == mincut && (newbal < minbal || + (newbal == minbal && BetterBalance2Way(ncon, minbalv, newbalv))))) { + mincut = newcut; + minbal = newbal; + mincutorder = nswaps; + rcopy(ncon, newbalv, minbalv); + } + else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */ + newcut += (ed[higain]-id[higain]); + iaxpy(ncon, 1, vwgt+higain*ncon, 1, pwgts+from*ncon, 1); + iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+to*ncon, 1); + break; + } + + where[higain] = to; + moved[higain] = nswaps; + swaps[nswaps] = higain; + + if (ctrl->dbglvl&METIS_DBG_MOVEINFO) { + printf("Moved%6"PRIDX" from %"PRIDX"(%"PRIDX") Gain:%5"PRIDX", " + "Cut:%5"PRIDX", NPwgts:", higain, from, cnum, ed[higain]-id[higain], newcut); + for (l=0; lpijbm), newbal); + } + + + /************************************************************** + * Update the id[i]/ed[i] values of the affected nodes + ***************************************************************/ + SWAP(id[higain], ed[higain], tmp); + if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) + BNDDelete(nbnd, bndind, bndptr, higain); + + for (j=xadj[higain]; j 0 ? + // 1.0*(ed[k]-id[k])/sqrt(vwgt[k*ncon+qnum[k]]+1) : ed[k]-id[k]); + rgain = ed[k]-id[k]; + rpqUpdate(queues[2*qnum[k]+where[k]], k, rgain); + } + } + } + else { + if (ed[k] > 0) { /* It will now become a boundary vertex */ + BNDInsert(nbnd, bndind, bndptr, k); + if (moved[k] == -1) { + //rgain = 1.0*(ed[k]-id[k])/sqrt(vwgt[k*ncon+qnum[k]]+1); + //rgain = (ed[k]-id[k] > 0 ? + // 1.0*(ed[k]-id[k])/sqrt(vwgt[k*ncon+qnum[k]]+1) : ed[k]-id[k]); + rgain = ed[k]-id[k]; + rpqInsert(queues[2*qnum[k]+where[k]], k, rgain); + } + } + } + } + + } + + + /**************************************************************** + * Roll back computations + *****************************************************************/ + for (i=0; imincutorder; nswaps--) { + higain = swaps[nswaps]; + + to = where[higain] = (where[higain]+1)%2; + SWAP(id[higain], ed[higain], tmp); + if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) + BNDDelete(nbnd, bndind, bndptr, higain); + else if (ed[higain] > 0 && bndptr[higain] == -1) + BNDInsert(nbnd, bndind, bndptr, higain); + + iaxpy(ncon, 1, vwgt+higain*ncon, 1, pwgts+to*ncon, 1); + iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+((to+1)%2)*ncon, 1); + for (j=xadj[higain]; j 0) + BNDInsert(nbnd, bndind, bndptr, k); + } + } graph->mincut = mincut; - graph->nbnd = nbnd; + graph->nbnd = nbnd; - if (mincutorder == -1 || mincut == initcut) + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + Print2WayRefineStats(ctrl, graph, ntpwgts, minbal, mincutorder)); + + if (mincutorder <= 0 || mincut == initcut) break; } - PQueueFree(ctrl, &parts[0]); - PQueueFree(ctrl, &parts[1]); + for (i=0; i<2*ncon; i++) + rpqDestroy(queues[i]); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function selects the partition number and the queue from which + we will move vertices out. */ +/*************************************************************************/ +void SelectQueue(graph_t *graph, real_t *pijbm, real_t *ubfactors, + rpq_t **queues, idx_t *from, idx_t *cnum) +{ + idx_t ncon, i, part; + real_t max, tmp; + + ncon = graph->ncon; + + *from = -1; + *cnum = -1; + + /* First determine the side and the queue, irrespective of the presence of nodes. + The side & queue is determined based on the most violated balancing constraint. */ + for (max=0.0, part=0; part<2; part++) { + for (i=0; ipwgts[part*ncon+i]*pijbm[part*ncon+i] - ubfactors[i]; + /* the '=' in the test below is to ensure that under tight constraints + the partition that is at the max is selected */ + if (tmp >= max) { + max = tmp; + *from = part; + *cnum = i; + } + } + } - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); + if (*from != -1) { + /* in case the desired queue is empty, select a queue from the same side */ + if (rpqLength(queues[2*(*cnum)+(*from)]) == 0) { + for (i=0; i 0) { + max = graph->pwgts[(*from)*ncon+i]*pijbm[(*from)*ncon+i] - ubfactors[i]; + *cnum = i; + break; + } + } + + for (i++; ipwgts[(*from)*ncon+i]*pijbm[(*from)*ncon+i] - ubfactors[i]; + if (tmp > max && rpqLength(queues[2*i+(*from)]) > 0) { + max = tmp; + *cnum = i; + } + } + } + + /* + printf("Selected1 %"PRIDX"(%"PRIDX") -> %"PRIDX" [%5"PRREAL"]\n", + *from, *cnum, rpqLength(queues[2*(*cnum)+(*from)]), max); + */ + } + else { + /* the partitioning does not violate balancing constraints, in which case select + a queue based on cut criteria */ + for (part=0; part<2; part++) { + for (i=0; i 0 && + (*from == -1 || rpqSeeTopKey(queues[2*i+part]) > max)) { + max = rpqSeeTopKey(queues[2*i+part]); + *from = part; + *cnum = i; + } + } + } + /* + printf("Selected2 %"PRIDX"(%"PRIDX") -> %"PRIDX"\n", + *from, *cnum, rpqLength(queues[2*(*cnum)+(*from)]), max); + */ + } } +/*************************************************************************/ +/*! Prints statistics about the refinement */ +/*************************************************************************/ +void Print2WayRefineStats(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, + real_t deltabal, idx_t mincutorder) +{ + int i; + + if (mincutorder == -2) { + printf("Parts: "); + printf("Nv-Nb[%5"PRIDX" %5"PRIDX"] ICut: %6"PRIDX, + graph->nvtxs, graph->nbnd, graph->mincut); + printf(" ["); + for (i=0; incon; i++) + printf("(%.3"PRREAL" %.3"PRREAL" T:%.3"PRREAL" %.3"PRREAL")", + graph->pwgts[i]*graph->invtvwgt[i], + graph->pwgts[graph->ncon+i]*graph->invtvwgt[i], + ntpwgts[i], ntpwgts[graph->ncon+i]); + printf("] LB: %.3"PRREAL"(%+.3"PRREAL")\n", + ComputeLoadImbalance(graph, 2, ctrl->pijbm), deltabal); + } + else { + printf("\tMincut: %6"PRIDX" at %5"PRIDX" NBND %6"PRIDX" NPwgts: [", + graph->mincut, mincutorder, graph->nbnd); + for (i=0; incon; i++) + printf("(%.3"PRREAL" %.3"PRREAL")", + graph->pwgts[i]*graph->invtvwgt[i], graph->pwgts[graph->ncon+i]*graph->invtvwgt[i]); + printf("] LB: %.3"PRREAL"(%+.3"PRREAL")\n", + ComputeLoadImbalance(graph, 2, ctrl->pijbm), deltabal); + } +} + diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/fortran.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/fortran.c index 46ebefd9..fd033f0b 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/fortran.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/fortran.c @@ -8,52 +8,50 @@ * Started 8/19/97 * George * - * $Id: fortran.c,v 1.1 2003/07/16 15:55:02 karypis Exp $ - * */ -#include +#include "metislib.h" -/************************************************************************* -* This function changes the numbering to start from 0 instead of 1 -**************************************************************************/ -void Change2CNumbering(int nvtxs, idxtype *xadj, idxtype *adjncy) +/*************************************************************************/ +/*! This function changes the numbering to start from 0 instead of 1 */ +/*************************************************************************/ +void Change2CNumbering(idx_t nvtxs, idx_t *xadj, idx_t *adjncy) { - int i, nedges; + idx_t i; for (i=0; i<=nvtxs; i++) xadj[i]--; - nedges = xadj[nvtxs]; - for (i=0; i - - -void METIS_PARTGRAPHRECURSIVE(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, int *options, int *edgecut, idxtype *part) -{ - METIS_PartGraphRecursive(nvtxs, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, options, edgecut, part); -} -void metis_partgraphrecursive(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, int *options, int *edgecut, idxtype *part) -{ - METIS_PartGraphRecursive(nvtxs, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, options, edgecut, part); -} -void metis_partgraphrecursive_(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, int *options, int *edgecut, idxtype *part) -{ - METIS_PartGraphRecursive(nvtxs, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, options, edgecut, part); -} -void metis_partgraphrecursive__(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, int *options, int *edgecut, idxtype *part) -{ - METIS_PartGraphRecursive(nvtxs, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, options, edgecut, part); -} - - -void METIS_WPARTGRAPHRECURSIVE(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, float *tpwgts, int *options, int *edgecut, idxtype *part) -{ - METIS_WPartGraphRecursive(nvtxs, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, tpwgts, options, edgecut, part); -} -void metis_wpartgraphrecursive(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, float *tpwgts, int *options, int *edgecut, idxtype *part) -{ - METIS_WPartGraphRecursive(nvtxs, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, tpwgts, options, edgecut, part); -} -void metis_wpartgraphrecursive_(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, float *tpwgts, int *options, int *edgecut, idxtype *part) -{ - METIS_WPartGraphRecursive(nvtxs, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, tpwgts, options, edgecut, part); -} -void metis_wpartgraphrecursive__(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, float *tpwgts, int *options, int *edgecut, idxtype *part) -{ - METIS_WPartGraphRecursive(nvtxs, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, tpwgts, options, edgecut, part); -} - - - -void METIS_PARTGRAPHKWAY(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, int *options, int *edgecut, idxtype *part) -{ - METIS_PartGraphKway(nvtxs, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, options, edgecut, part); -} -void metis_partgraphkway(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, int *options, int *edgecut, idxtype *part) -{ - METIS_PartGraphKway(nvtxs, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, options, edgecut, part); -} -void metis_partgraphkway_(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, int *options, int *edgecut, idxtype *part) -{ - METIS_PartGraphKway(nvtxs, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, options, edgecut, part); -} -void metis_partgraphkway__(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, int *options, int *edgecut, idxtype *part) -{ - METIS_PartGraphKway(nvtxs, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, options, edgecut, part); -} - - - -void METIS_WPARTGRAPHKWAY(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, float *tpwgts, int *options, int *edgecut, idxtype *part) -{ - METIS_WPartGraphKway(nvtxs, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, tpwgts, options, edgecut, part); -} -void metis_wpartgraphkway(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, float *tpwgts, int *options, int *edgecut, idxtype *part) -{ - METIS_WPartGraphKway(nvtxs, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, tpwgts, options, edgecut, part); -} -void metis_wpartgraphkway_(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, float *tpwgts, int *options, int *edgecut, idxtype *part) -{ - METIS_WPartGraphKway(nvtxs, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, tpwgts, options, edgecut, part); -} -void metis_wpartgraphkway__(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, float *tpwgts, int *options, int *edgecut, idxtype *part) -{ - METIS_WPartGraphKway(nvtxs, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, tpwgts, options, edgecut, part); -} - - - -void METIS_EDGEND(int *nvtxs, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, idxtype *perm, idxtype *iperm) -{ - METIS_EdgeND(nvtxs, xadj, adjncy, numflag, options, perm, iperm); -} -void metis_edgend(int *nvtxs, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, idxtype *perm, idxtype *iperm) -{ - METIS_EdgeND(nvtxs, xadj, adjncy, numflag, options, perm, iperm); -} -void metis_edgend_(int *nvtxs, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, idxtype *perm, idxtype *iperm) -{ - METIS_EdgeND(nvtxs, xadj, adjncy, numflag, options, perm, iperm); -} -void metis_edgend__(int *nvtxs, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, idxtype *perm, idxtype *iperm) -{ - METIS_EdgeND(nvtxs, xadj, adjncy, numflag, options, perm, iperm); -} - - - -void METIS_NODEND(int *nvtxs, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, idxtype *perm, idxtype *iperm) -{ - METIS_NodeND(nvtxs, xadj, adjncy, numflag, options, perm, iperm); -} -void metis_nodend(int *nvtxs, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, idxtype *perm, idxtype *iperm) -{ - METIS_NodeND(nvtxs, xadj, adjncy, numflag, options, perm, iperm); -} -void metis_nodend_(int *nvtxs, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, idxtype *perm, idxtype *iperm) -{ - METIS_NodeND(nvtxs, xadj, adjncy, numflag, options, perm, iperm); -} -void metis_nodend__(int *nvtxs, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, idxtype *perm, idxtype *iperm) -{ - METIS_NodeND(nvtxs, xadj, adjncy, numflag, options, perm, iperm); -} - - - -void METIS_NODEWND(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, int *numflag, int *options, idxtype *perm, idxtype *iperm) -{ - METIS_NodeWND(nvtxs, xadj, adjncy, vwgt, numflag, options, perm, iperm); -} -void metis_nodewnd(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, int *numflag, int *options, idxtype *perm, idxtype *iperm) -{ - METIS_NodeWND(nvtxs, xadj, adjncy, vwgt, numflag, options, perm, iperm); -} -void metis_nodewnd_(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, int *numflag, int *options, idxtype *perm, idxtype *iperm) -{ - METIS_NodeWND(nvtxs, xadj, adjncy, vwgt, numflag, options, perm, iperm); -} -void metis_nodewnd__(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, int *numflag, int *options, idxtype *perm, idxtype *iperm) -{ - METIS_NodeWND(nvtxs, xadj, adjncy, vwgt, numflag, options, perm, iperm); -} - - - -void METIS_PARTMESHNODAL(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, int *nparts, int *edgecut, idxtype *epart, idxtype *npart) -{ - METIS_PartMeshNodal(ne, nn, elmnts, etype, numflag, nparts, edgecut, epart, npart); -} -void metis_partmeshnodal(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, int *nparts, int *edgecut, idxtype *epart, idxtype *npart) -{ - METIS_PartMeshNodal(ne, nn, elmnts, etype, numflag, nparts, edgecut, epart, npart); -} -void metis_partmeshnodal_(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, int *nparts, int *edgecut, idxtype *epart, idxtype *npart) -{ - METIS_PartMeshNodal(ne, nn, elmnts, etype, numflag, nparts, edgecut, epart, npart); -} -void metis_partmeshnodal__(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, int *nparts, int *edgecut, idxtype *epart, idxtype *npart) -{ - METIS_PartMeshNodal(ne, nn, elmnts, etype, numflag, nparts, edgecut, epart, npart); -} - - -void METIS_PARTMESHDUAL(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, int *nparts, int *edgecut, idxtype *epart, idxtype *npart) -{ - METIS_PartMeshDual(ne, nn, elmnts, etype, numflag, nparts, edgecut, epart, npart); -} -void metis_partmeshdual(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, int *nparts, int *edgecut, idxtype *epart, idxtype *npart) -{ - METIS_PartMeshDual(ne, nn, elmnts, etype, numflag, nparts, edgecut, epart, npart); -} -void metis_partmeshdual_(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, int *nparts, int *edgecut, idxtype *epart, idxtype *npart) -{ - METIS_PartMeshDual(ne, nn, elmnts, etype, numflag, nparts, edgecut, epart, npart); -} -void metis_partmeshdual__(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, int *nparts, int *edgecut, idxtype *epart, idxtype *npart) -{ - METIS_PartMeshDual(ne, nn, elmnts, etype, numflag, nparts, edgecut, epart, npart); -} - - -void METIS_MESHTONODAL(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, idxtype *dxadj, idxtype *dadjncy) -{ - METIS_MeshToNodal(ne, nn, elmnts, etype, numflag, dxadj, dadjncy); -} -void metis_meshtonodal(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, idxtype *dxadj, idxtype *dadjncy) -{ - METIS_MeshToNodal(ne, nn, elmnts, etype, numflag, dxadj, dadjncy); -} -void metis_meshtonodal_(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, idxtype *dxadj, idxtype *dadjncy) -{ - METIS_MeshToNodal(ne, nn, elmnts, etype, numflag, dxadj, dadjncy); -} -void metis_meshtonodal__(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, idxtype *dxadj, idxtype *dadjncy) -{ - METIS_MeshToNodal(ne, nn, elmnts, etype, numflag, dxadj, dadjncy); -} - - -void METIS_MESHTODUAL(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, idxtype *dxadj, idxtype *dadjncy) -{ - METIS_MeshToDual(ne, nn, elmnts, etype, numflag, dxadj, dadjncy); -} -void metis_meshtodual(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, idxtype *dxadj, idxtype *dadjncy) -{ - METIS_MeshToDual(ne, nn, elmnts, etype, numflag, dxadj, dadjncy); -} -void metis_meshtodual_(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, idxtype *dxadj, idxtype *dadjncy) -{ - METIS_MeshToDual(ne, nn, elmnts, etype, numflag, dxadj, dadjncy); -} -void metis_meshtodual__(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, idxtype *dxadj, idxtype *dadjncy) -{ - METIS_MeshToDual(ne, nn, elmnts, etype, numflag, dxadj, dadjncy); -} - - -void METIS_ESTIMATEMEMORY(int *nvtxs, idxtype *xadj, idxtype *adjncy, int *numflag, int *optype, int *nbytes) -{ - METIS_EstimateMemory(nvtxs, xadj, adjncy, numflag, optype, nbytes); -} -void metis_estimatememory(int *nvtxs, idxtype *xadj, idxtype *adjncy, int *numflag, int *optype, int *nbytes) -{ - METIS_EstimateMemory(nvtxs, xadj, adjncy, numflag, optype, nbytes); -} -void metis_estimatememory_(int *nvtxs, idxtype *xadj, idxtype *adjncy, int *numflag, int *optype, int *nbytes) -{ - METIS_EstimateMemory(nvtxs, xadj, adjncy, numflag, optype, nbytes); -} -void metis_estimatememory__(int *nvtxs, idxtype *xadj, idxtype *adjncy, int *numflag, int *optype, int *nbytes) -{ - METIS_EstimateMemory(nvtxs, xadj, adjncy, numflag, optype, nbytes); -} - - - -void METIS_MCPARTGRAPHRECURSIVE(int *nvtxs, int *ncon, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, int *options, int *edgecut, idxtype *part) -{ - METIS_mCPartGraphRecursive(nvtxs, ncon, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, options, edgecut, part); -} -void metis_mcpartgraphrecursive(int *nvtxs, int *ncon, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, int *options, int *edgecut, idxtype *part) -{ - METIS_mCPartGraphRecursive(nvtxs, ncon, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, options, edgecut, part); -} -void metis_mcpartgraphrecursive_(int *nvtxs, int *ncon, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, int *options, int *edgecut, idxtype *part) -{ - METIS_mCPartGraphRecursive(nvtxs, ncon, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, options, edgecut, part); -} -void metis_mcpartgraphrecursive__(int *nvtxs, int *ncon, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, int *options, int *edgecut, idxtype *part) -{ - METIS_mCPartGraphRecursive(nvtxs, ncon, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, options, edgecut, part); -} - - -void METIS_MCPARTGRAPHKWAY(int *nvtxs, int *ncon, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, float *rubvec, int *options, int *edgecut, idxtype *part) -{ - METIS_mCPartGraphKway(nvtxs, ncon, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, rubvec, options, edgecut, part); -} -void metis_mcpartgraphkway(int *nvtxs, int *ncon, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, float *rubvec, int *options, int *edgecut, idxtype *part) -{ - METIS_mCPartGraphKway(nvtxs, ncon, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, rubvec, options, edgecut, part); -} -void metis_mcpartgraphkway_(int *nvtxs, int *ncon, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, float *rubvec, int *options, int *edgecut, idxtype *part) -{ - METIS_mCPartGraphKway(nvtxs, ncon, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, rubvec, options, edgecut, part); -} -void metis_mcpartgraphkway__(int *nvtxs, int *ncon, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, float *rubvec, int *options, int *edgecut, idxtype *part) -{ - METIS_mCPartGraphKway(nvtxs, ncon, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, rubvec, options, edgecut, part); -} - - -void METIS_PARTGRAPHVKWAY(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *vsize, int *wgtflag, int *numflag, int *nparts, int *options, int *volume, idxtype *part) -{ - METIS_PartGraphVKway(nvtxs, xadj, adjncy, vwgt, vsize, wgtflag, numflag, nparts, options, volume, part); -} -void metis_partgraphvkaway(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *vsize, int *wgtflag, int *numflag, int *nparts, int *options, int *volume, idxtype *part) -{ - METIS_PartGraphVKway(nvtxs, xadj, adjncy, vwgt, vsize, wgtflag, numflag, nparts, options, volume, part); -} -void metis_partgraphvkaway_(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *vsize, int *wgtflag, int *numflag, int *nparts, int *options, int *volume, idxtype *part) -{ - METIS_PartGraphVKway(nvtxs, xadj, adjncy, vwgt, vsize, wgtflag, numflag, nparts, options, volume, part); -} -void metis_partgraphvkaway__(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *vsize, int *wgtflag, int *numflag, int *nparts, int *options, int *volume, idxtype *part) -{ - METIS_PartGraphVKway(nvtxs, xadj, adjncy, vwgt, vsize, wgtflag, numflag, nparts, options, volume, part); -} - -void METIS_WPARTGRAPHVKWAY(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *vsize, int *wgtflag, int *numflag, int *nparts, float *tpwgts, int *options, int *volume, idxtype *part) -{ - METIS_WPartGraphVKway(nvtxs, xadj, adjncy, vwgt, vsize, wgtflag, numflag, nparts, tpwgts, options, volume, part); -} -void metis_wpartgraphvkaway(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *vsize, int *wgtflag, int *numflag, int *nparts, float *tpwgts, int *options, int *volume, idxtype *part) -{ - METIS_WPartGraphVKway(nvtxs, xadj, adjncy, vwgt, vsize, wgtflag, numflag, nparts, tpwgts, options, volume, part); -} -void metis_wpartgraphvkaway_(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *vsize, int *wgtflag, int *numflag, int *nparts, float *tpwgts, int *options, int *volume, idxtype *part) -{ - METIS_WPartGraphVKway(nvtxs, xadj, adjncy, vwgt, vsize, wgtflag, numflag, nparts, tpwgts, options, volume, part); -} -void metis_wpartgraphvkaway__(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *vsize, int *wgtflag, int *numflag, int *nparts, float *tpwgts, int *options, int *volume, idxtype *part) -{ - METIS_WPartGraphVKway(nvtxs, xadj, adjncy, vwgt, vsize, wgtflag, numflag, nparts, tpwgts, options, volume, part); -} +#include "metislib.h" + +#define FRENAME(name, dargs, cargs, name1, name2, name3, name4) \ + int name1 dargs { return name cargs; } \ + int name2 dargs { return name cargs; } \ + int name3 dargs { return name cargs; } \ + int name4 dargs { return name cargs; } + + +FRENAME( + METIS_PartGraphRecursive, + (idx_t *nvtxs, idx_t *ncon, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t *vsize, idx_t *adjwgt, idx_t *nparts, real_t *tpwgts, + real_t *ubvec, idx_t *options, idx_t *edgecut, idx_t *part), + (nvtxs, ncon, xadj, adjncy, vwgt, + vsize, adjwgt, nparts, tpwgts, + ubvec, options, edgecut, part), + METIS_PARTGRAPHRECURSIVE, + metis_partgraphrecursive, + metis_partgraphrecursive_, + metis_partgraphrecursive__ +) + + +FRENAME( + METIS_PartGraphKway, + (idx_t *nvtxs, idx_t *ncon, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t *vsize, idx_t *adjwgt, idx_t *nparts, real_t *tpwgts, + real_t *ubvec, idx_t *options, idx_t *edgecut, idx_t *part), + (nvtxs, ncon, xadj, adjncy, vwgt, + vsize, adjwgt, nparts, tpwgts, + ubvec, options, edgecut, part), + METIS_PARTGRAPHKWAY, + metis_partgraphkway, + metis_partgraphkway_, + metis_partgraphkway__ +) + +FRENAME( + METIS_MeshToDual, + (idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, idx_t *ncommon, idx_t *numflag, + idx_t **r_xadj, idx_t **r_adjncy), + (ne, nn, eptr, eind, ncommon, numflag, r_xadj, r_adjncy), + METIS_MESHTODUAL, + metis_meshtodual, + metis_meshtodual_, + metis_meshtodual__ +) + + +FRENAME( + METIS_MeshToNodal, + (idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, idx_t *numflag, idx_t **r_xadj, + idx_t **r_adjncy), + (ne, nn, eptr, eind, numflag, r_xadj, r_adjncy), + METIS_MESHTONODAL, + metis_meshtonodal, + metis_meshtonodal_, + metis_meshtonodal__ +) + + +FRENAME( + METIS_PartMeshNodal, + (idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, idx_t *vwgt, idx_t *vsize, + idx_t *nparts, real_t *tpwgts, idx_t *options, idx_t *objval, idx_t *epart, + idx_t *npart), + (ne, nn, eptr, eind, vwgt, vsize, nparts, tpwgts, options, objval, epart, npart), + METIS_PARTMESHNODAL, + metis_partmeshnodal, + metis_partmeshnodal_, + metis_partmeshnodal__ +) + + +FRENAME( + METIS_PartMeshDual, + (idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, idx_t *vwgt, idx_t *vsize, + idx_t *ncommon, idx_t *nparts, real_t *tpwgts, idx_t *options, idx_t *objval, + idx_t *epart, idx_t *npart), + (ne, nn, eptr, eind, vwgt, vsize, ncommon, nparts, tpwgts, options, objval, epart, npart), + METIS_PARTMESHDUAL, + metis_partmeshdual, + metis_partmeshdual_, + metis_partmeshdual__ +) + + +FRENAME( + METIS_NodeND, + (idx_t *nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, idx_t *options, idx_t *perm, + idx_t *iperm), + (nvtxs, xadj, adjncy, vwgt, options, perm, iperm), + METIS_NODEND, + metis_nodend, + metis_nodend_, + metis_nodend__ +) + + +FRENAME( + METIS_Free, + (void *ptr), + (ptr), + METIS_FREE, + metis_free, + metis_free_, + metis_free__ +) + + +FRENAME( + METIS_SetDefaultOptions, + (idx_t *options), + (options), + METIS_SETDEFAULTOPTIONS, + metis_setdefaultoptions, + metis_setdefaultoptions_, + metis_setdefaultoptions__ +) + diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/gklib.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/gklib.c new file mode 100644 index 00000000..4e17eac4 --- /dev/null +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/gklib.c @@ -0,0 +1,120 @@ +/*! +\file gklib.c +\brief Various helper routines generated using GKlib's templates + +\date Started 4/12/2007 +\author George +\author Copyright 1997-2009, Regents of the University of Minnesota +\version\verbatim $Id: gklib.c 10395 2011-06-23 23:28:06Z karypis $ \endverbatim +*/ + + +#include "metislib.h" + + +/*************************************************************************/ +/*! BLAS routines */ +/*************************************************************************/ +GK_MKBLAS(i, idx_t, idx_t) +GK_MKBLAS(r, real_t, real_t) + +/*************************************************************************/ +/*! Memory allocation routines */ +/*************************************************************************/ +GK_MKALLOC(i, idx_t) +GK_MKALLOC(r, real_t) +GK_MKALLOC(ikv, ikv_t) +GK_MKALLOC(rkv, rkv_t) + +/*************************************************************************/ +/*! Priority queues routines */ +/*************************************************************************/ +#define key_gt(a, b) ((a) > (b)) +GK_MKPQUEUE(ipq, ipq_t, ikv_t, idx_t, idx_t, ikvmalloc, IDX_MAX, key_gt) +GK_MKPQUEUE(rpq, rpq_t, rkv_t, real_t, idx_t, rkvmalloc, REAL_MAX, key_gt) +#undef key_gt + +/*************************************************************************/ +/*! Random number generation routines */ +/*************************************************************************/ +GK_MKRANDOM(i, idx_t, idx_t) + +/*************************************************************************/ +/*! Utility routines */ +/*************************************************************************/ +GK_MKARRAY2CSR(i, idx_t) + +/*************************************************************************/ +/*! Sorting routines */ +/*************************************************************************/ +void isorti(size_t n, idx_t *base) +{ +#define i_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(idx_t, base, n, i_lt); +#undef i_lt +} + +void isortd(size_t n, idx_t *base) +{ +#define i_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(idx_t, base, n, i_gt); +#undef i_gt +} + +void rsorti(size_t n, real_t *base) +{ +#define r_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(real_t, base, n, r_lt); +#undef r_lt +} + +void rsortd(size_t n, real_t *base) +{ +#define r_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(real_t, base, n, r_gt); +#undef r_gt +} + +void ikvsorti(size_t n, ikv_t *base) +{ +#define ikey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(ikv_t, base, n, ikey_lt); +#undef ikey_lt +} + +/* Sorts based both on key and val */ +void ikvsortii(size_t n, ikv_t *base) +{ +#define ikeyval_lt(a, b) ((a)->key < (b)->key || ((a)->key == (b)->key && (a)->val < (b)->val)) + GK_MKQSORT(ikv_t, base, n, ikeyval_lt); +#undef ikeyval_lt +} + +void ikvsortd(size_t n, ikv_t *base) +{ +#define ikey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(ikv_t, base, n, ikey_gt); +#undef ikey_gt +} + +void rkvsorti(size_t n, rkv_t *base) +{ +#define rkey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(rkv_t, base, n, rkey_lt); +#undef rkey_lt +} + +void rkvsortd(size_t n, rkv_t *base) +{ +#define rkey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(rkv_t, base, n, rkey_gt); +#undef rkey_gt +} + +void uvwsorti(size_t n, uvw_t *base) +{ +#define uvwkey_lt(a, b) ((a)->u < (b)->u || ((a)->u == (b)->u && (a)->v < (b)->v)) + GK_MKQSORT(uvw_t, base, n, uvwkey_lt); +#undef uvwkey_lt +} + diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/gklib_defs.h b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/gklib_defs.h new file mode 100644 index 00000000..dfac5ca6 --- /dev/null +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/gklib_defs.h @@ -0,0 +1,53 @@ +/*! +\file +\brief Data structures and prototypes for GKlib integration + +\date Started 12/23/2008 +\author George +\version\verbatim $Id: gklib_defs.h 10395 2011-06-23 23:28:06Z karypis $ \endverbatim +*/ + +#ifndef _LIBMETIS_GKLIB_H_ +#define _LIBMETIS_GKLIB_H_ + +#include "gklib_rename.h" + +/*************************************************************************/ +/*! Stores a weighted edge */ +/*************************************************************************/ +typedef struct { + idx_t u, v, w; /*!< Edge (u,v) with weight w */ +} uvw_t; + +/************************************************************************* +* Define various data structure using GKlib's templates. +**************************************************************************/ +GK_MKKEYVALUE_T(ikv_t, idx_t, idx_t) +GK_MKKEYVALUE_T(rkv_t, real_t, idx_t) +GK_MKPQUEUE_T(ipq_t, ikv_t) +GK_MKPQUEUE_T(rpq_t, rkv_t) + + +/* gklib.c */ +GK_MKBLAS_PROTO(i, idx_t, idx_t) +GK_MKBLAS_PROTO(r, real_t, real_t) +GK_MKALLOC_PROTO(i, idx_t) +GK_MKALLOC_PROTO(r, real_t) +GK_MKALLOC_PROTO(ikv, ikv_t) +GK_MKALLOC_PROTO(rkv, rkv_t) +GK_MKPQUEUE_PROTO(ipq, ipq_t, idx_t, idx_t) +GK_MKPQUEUE_PROTO(rpq, rpq_t, real_t, idx_t) +GK_MKRANDOM_PROTO(i, idx_t, idx_t) +GK_MKARRAY2CSR_PROTO(i, idx_t) +void isorti(size_t n, idx_t *base); +void isortd(size_t n, idx_t *base); +void rsorti(size_t n, real_t *base); +void rsortd(size_t n, real_t *base); +void ikvsorti(size_t n, ikv_t *base); +void ikvsortii(size_t n, ikv_t *base); +void ikvsortd(size_t n, ikv_t *base); +void rkvsorti(size_t n, rkv_t *base); +void rkvsortd(size_t n, rkv_t *base); +void uvwsorti(size_t n, uvw_t *base); + +#endif diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/gklib_rename.h b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/gklib_rename.h new file mode 100644 index 00000000..78dc8b39 --- /dev/null +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/gklib_rename.h @@ -0,0 +1,122 @@ +/*! +\file + + * Copyright 1997, Regents of the University of Minnesota + * + * This file contains header files + * + * Started 10/2/97 + * George + * + * $Id: gklib_rename.h 10395 2011-06-23 23:28:06Z karypis $ + * + */ + + +#ifndef _LIBMETIS_GKLIB_RENAME_H_ +#define _LIBMETIS_GKLIB_RENAME_H_ + +/* gklib.c - generated from the .o files using the ./utils/listundescapedsumbols.csh */ +#define iAllocMatrix libmetis__iAllocMatrix +#define iFreeMatrix libmetis__iFreeMatrix +#define iSetMatrix libmetis__iSetMatrix +#define iargmax libmetis__iargmax +#define iargmax_n libmetis__iargmax_n +#define iargmin libmetis__iargmin +#define iarray2csr libmetis__iarray2csr +#define iaxpy libmetis__iaxpy +#define icopy libmetis__icopy +#define idot libmetis__idot +#define iincset libmetis__iincset +#define ikvAllocMatrix libmetis__ikvAllocMatrix +#define ikvFreeMatrix libmetis__ikvFreeMatrix +#define ikvSetMatrix libmetis__ikvSetMatrix +#define ikvcopy libmetis__ikvcopy +#define ikvmalloc libmetis__ikvmalloc +#define ikvrealloc libmetis__ikvrealloc +#define ikvset libmetis__ikvset +#define ikvsmalloc libmetis__ikvsmalloc +#define ikvsortd libmetis__ikvsortd +#define ikvsorti libmetis__ikvsorti +#define ikvsortii libmetis__ikvsortii +#define imalloc libmetis__imalloc +#define imax libmetis__imax +#define imin libmetis__imin +#define inorm2 libmetis__inorm2 +#define ipqCheckHeap libmetis__ipqCheckHeap +#define ipqCreate libmetis__ipqCreate +#define ipqDelete libmetis__ipqDelete +#define ipqDestroy libmetis__ipqDestroy +#define ipqFree libmetis__ipqFree +#define ipqGetTop libmetis__ipqGetTop +#define ipqInit libmetis__ipqInit +#define ipqInsert libmetis__ipqInsert +#define ipqLength libmetis__ipqLength +#define ipqReset libmetis__ipqReset +#define ipqSeeKey libmetis__ipqSeeKey +#define ipqSeeTopKey libmetis__ipqSeeTopKey +#define ipqSeeTopVal libmetis__ipqSeeTopVal +#define ipqUpdate libmetis__ipqUpdate +#define isrand libmetis__isrand +#define irand libmetis__irand +#define irandArrayPermute libmetis__irandArrayPermute +#define irandArrayPermuteFine libmetis__irandArrayPermuteFine +#define irandInRange libmetis__irandInRange +#define irealloc libmetis__irealloc +#define iscale libmetis__iscale +#define iset libmetis__iset +#define ismalloc libmetis__ismalloc +#define isortd libmetis__isortd +#define isorti libmetis__isorti +#define isrand libmetis__isrand +#define isum libmetis__isum +#define rAllocMatrix libmetis__rAllocMatrix +#define rFreeMatrix libmetis__rFreeMatrix +#define rSetMatrix libmetis__rSetMatrix +#define rargmax libmetis__rargmax +#define rargmax_n libmetis__rargmax_n +#define rargmin libmetis__rargmin +#define raxpy libmetis__raxpy +#define rcopy libmetis__rcopy +#define rdot libmetis__rdot +#define rincset libmetis__rincset +#define rkvAllocMatrix libmetis__rkvAllocMatrix +#define rkvFreeMatrix libmetis__rkvFreeMatrix +#define rkvSetMatrix libmetis__rkvSetMatrix +#define rkvcopy libmetis__rkvcopy +#define rkvmalloc libmetis__rkvmalloc +#define rkvrealloc libmetis__rkvrealloc +#define rkvset libmetis__rkvset +#define rkvsmalloc libmetis__rkvsmalloc +#define rkvsortd libmetis__rkvsortd +#define rkvsorti libmetis__rkvsorti +#define rmalloc libmetis__rmalloc +#define rmax libmetis__rmax +#define rmin libmetis__rmin +#define rnorm2 libmetis__rnorm2 +#define rpqCheckHeap libmetis__rpqCheckHeap +#define rpqCreate libmetis__rpqCreate +#define rpqDelete libmetis__rpqDelete +#define rpqDestroy libmetis__rpqDestroy +#define rpqFree libmetis__rpqFree +#define rpqGetTop libmetis__rpqGetTop +#define rpqInit libmetis__rpqInit +#define rpqInsert libmetis__rpqInsert +#define rpqLength libmetis__rpqLength +#define rpqReset libmetis__rpqReset +#define rpqSeeKey libmetis__rpqSeeKey +#define rpqSeeTopKey libmetis__rpqSeeTopKey +#define rpqSeeTopVal libmetis__rpqSeeTopVal +#define rpqUpdate libmetis__rpqUpdate +#define rrealloc libmetis__rrealloc +#define rscale libmetis__rscale +#define rset libmetis__rset +#define rsmalloc libmetis__rsmalloc +#define rsortd libmetis__rsortd +#define rsorti libmetis__rsorti +#define rsum libmetis__rsum +#define uvwsorti libmetis__uvwsorti + +#endif + + diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/graph.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/graph.c index 9a93784b..efb988e0 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/graph.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/graph.c @@ -1,616 +1,433 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * graph.c - * - * This file contains functions that deal with setting up the graphs - * for METIS. - * - * Started 7/25/97 - * George - * - * $Id: graph.c,v 1.2 2003/07/31 06:14:01 karypis Exp $ - * - */ - -#include - -/************************************************************************* -* This function sets up the graph from the user input -**************************************************************************/ -void SetUpGraph(GraphType *graph, int OpType, int nvtxs, int ncon, - idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int wgtflag) -{ - int i, j, k, sum, gsize; - float *nvwgt; - idxtype tvwgt[MAXNCON]; +/** +\file +\brief Functions that deal with setting up the graphs for METIS. - if (OpType == OP_KMETIS && ncon == 1 && (wgtflag&2) == 0 && (wgtflag&1) == 0) { - SetUpGraphKway(graph, nvtxs, xadj, adjncy); - return; - } +\date Started 7/25/1997 +\author George +\author Copyright 1997-2009, Regents of the University of Minnesota +\version\verbatim $Id: graph.c 15817 2013-11-25 14:58:41Z karypis $ \endverbatim +*/ - InitGraph(graph); +#include "metislib.h" - graph->nvtxs = nvtxs; - graph->nedges = xadj[nvtxs]; - graph->ncon = ncon; - graph->xadj = xadj; - graph->adjncy = adjncy; - - if (ncon == 1) { /* We are in the non mC mode */ - gsize = 0; - if ((wgtflag&2) == 0) - gsize += nvtxs; - if ((wgtflag&1) == 0) - gsize += graph->nedges; - - gsize += 2*nvtxs; - - graph->gdata = idxmalloc(gsize, "SetUpGraph: gdata"); - - /* Create the vertex/edge weight vectors if they are not supplied */ - gsize = 0; - if ((wgtflag&2) == 0) { - vwgt = graph->vwgt = idxset(nvtxs, 1, graph->gdata); - gsize += nvtxs; - } - else - graph->vwgt = vwgt; - if ((wgtflag&1) == 0) { - adjwgt = graph->adjwgt = idxset(graph->nedges, 1, graph->gdata+gsize); - gsize += graph->nedges; - } - else - graph->adjwgt = adjwgt; +/*************************************************************************/ +/*! This function sets up the graph from the user input */ +/*************************************************************************/ +graph_t *SetupGraph(ctrl_t *ctrl, idx_t nvtxs, idx_t ncon, idx_t *xadj, + idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt) +{ + idx_t i, j, k, sum; + real_t *nvwgt; + graph_t *graph; + /* allocate the graph and fill in the fields */ + graph = CreateGraph(); - /* Compute the initial values of the adjwgtsum */ - graph->adjwgtsum = graph->gdata + gsize; - gsize += nvtxs; + graph->nvtxs = nvtxs; + graph->nedges = xadj[nvtxs]; + graph->ncon = ncon; - for (i=0; iadjwgtsum[i] = sum; - } + graph->xadj = xadj; + graph->free_xadj = 0; - graph->cmap = graph->gdata + gsize; - gsize += nvtxs; + graph->adjncy = adjncy; + graph->free_adjncy = 0; - } - else { /* Set up the graph in MOC mode */ - gsize = 0; - if ((wgtflag&1) == 0) - gsize += graph->nedges; + graph->droppedewgt = 0; - gsize += 2*nvtxs; + /* setup the vertex weights */ + if (vwgt) { + graph->vwgt = vwgt; + graph->free_vwgt = 0; + } + else { + vwgt = graph->vwgt = ismalloc(ncon*nvtxs, 1, "SetupGraph: vwgt"); + } - graph->gdata = idxmalloc(gsize, "SetUpGraph: gdata"); - gsize = 0; + graph->tvwgt = imalloc(ncon, "SetupGraph: tvwgts"); + graph->invtvwgt = rmalloc(ncon, "SetupGraph: invtvwgts"); + for (i=0; itvwgt[i] = isum(nvtxs, vwgt+i, ncon); + graph->invtvwgt[i] = 1.0/(graph->tvwgt[i] > 0 ? graph->tvwgt[i] : 1); + } - for (i=0; invwgt = fmalloc(ncon*nvtxs, "SetUpGraph: nvwgt"); - for (i=0; iobjtype == METIS_OBJTYPE_VOL) { + /* Setup the vsize */ + if (vsize) { + graph->vsize = vsize; + graph->free_vsize = 0; } - - - /* Create the edge weight vectors if they are not supplied */ - if ((wgtflag&1) == 0) { - adjwgt = graph->adjwgt = idxset(graph->nedges, 1, graph->gdata+gsize); - gsize += graph->nedges; + else { + vsize = graph->vsize = ismalloc(nvtxs, 1, "SetupGraph: vsize"); } - else - graph->adjwgt = adjwgt; - - /* Compute the initial values of the adjwgtsum */ - graph->adjwgtsum = graph->gdata + gsize; - gsize += nvtxs; + /* Allocate memory for edge weights and initialize them to the sum of the vsize */ + adjwgt = graph->adjwgt = imalloc(graph->nedges, "SetupGraph: adjwgt"); for (i=0; iadjwgtsum[i] = sum; + adjwgt[j] = 1+vsize[i]+vsize[adjncy[j]]; + } + } + else { /* For edgecut minimization */ + /* setup the edge weights */ + if (adjwgt) { + graph->adjwgt = adjwgt; + graph->free_adjwgt = 0; + } + else { + adjwgt = graph->adjwgt = ismalloc(graph->nedges, 1, "SetupGraph: adjwgt"); } + } - graph->cmap = graph->gdata + gsize; - gsize += nvtxs; - } + /* setup various derived info */ + SetupGraph_tvwgt(graph); - if (OpType != OP_KMETIS && OpType != OP_KVMETIS) { - graph->label = idxmalloc(nvtxs, "SetUpGraph: label"); + if (ctrl->optype == METIS_OP_PMETIS || ctrl->optype == METIS_OP_OMETIS) + SetupGraph_label(graph); - for (i=0; ilabel[i] = i; - } + ASSERT(CheckGraph(graph, ctrl->numflag, 1)); + return graph; } -/************************************************************************* -* This function sets up the graph from the user input -**************************************************************************/ -void SetUpGraphKway(GraphType *graph, int nvtxs, idxtype *xadj, idxtype *adjncy) +/*************************************************************************/ +/*! Set's up the tvwgt/invtvwgt info */ +/*************************************************************************/ +void SetupGraph_tvwgt(graph_t *graph) { - int i; - - InitGraph(graph); - - graph->nvtxs = nvtxs; - graph->nedges = xadj[nvtxs]; - graph->ncon = 1; - graph->xadj = xadj; - graph->vwgt = NULL; - graph->adjncy = adjncy; - graph->adjwgt = NULL; - - graph->gdata = idxmalloc(2*nvtxs, "SetUpGraph: gdata"); - graph->adjwgtsum = graph->gdata; - graph->cmap = graph->gdata + nvtxs; + idx_t i; - /* Compute the initial values of the adjwgtsum */ - for (i=0; iadjwgtsum[i] = xadj[i+1]-xadj[i]; + if (graph->tvwgt == NULL) + graph->tvwgt = imalloc(graph->ncon, "SetupGraph_tvwgt: tvwgt"); + if (graph->invtvwgt == NULL) + graph->invtvwgt = rmalloc(graph->ncon, "SetupGraph_tvwgt: invtvwgt"); + for (i=0; incon; i++) { + graph->tvwgt[i] = isum(graph->nvtxs, graph->vwgt+i, graph->ncon); + graph->invtvwgt[i] = 1.0/(graph->tvwgt[i] > 0 ? graph->tvwgt[i] : 1); + } } - -/************************************************************************* -* This function sets up the graph from the user input -**************************************************************************/ -void SetUpGraph2(GraphType *graph, int nvtxs, int ncon, idxtype *xadj, - idxtype *adjncy, float *nvwgt, idxtype *adjwgt) +/*************************************************************************/ +/*! Set's up the label info */ +/*************************************************************************/ +void SetupGraph_label(graph_t *graph) { - int i, j, sum; - - InitGraph(graph); + idx_t i; - graph->nvtxs = nvtxs; - graph->nedges = xadj[nvtxs]; - graph->ncon = ncon; - graph->xadj = xadj; - graph->adjncy = adjncy; - graph->adjwgt = adjwgt; - - graph->nvwgt = fmalloc(nvtxs*ncon, "SetUpGraph2: graph->nvwgt"); - scopy(nvtxs*ncon, nvwgt, graph->nvwgt); - - graph->gdata = idxmalloc(2*nvtxs, "SetUpGraph: gdata"); - - /* Compute the initial values of the adjwgtsum */ - graph->adjwgtsum = graph->gdata; - for (i=0; iadjwgtsum[i] = sum; - } + if (graph->label == NULL) + graph->label = imalloc(graph->nvtxs, "SetupGraph_label: label"); - graph->cmap = graph->gdata+nvtxs; - - graph->label = idxmalloc(nvtxs, "SetUpGraph: label"); - for (i=0; invtxs; i++) graph->label[i] = i; - } -/************************************************************************* -* This function sets up the graph from the user input -**************************************************************************/ -void VolSetUpGraph(GraphType *graph, int OpType, int nvtxs, int ncon, idxtype *xadj, - idxtype *adjncy, idxtype *vwgt, idxtype *vsize, int wgtflag) +/*************************************************************************/ +/*! Setup the various arrays for the split graph */ +/*************************************************************************/ +graph_t *SetupSplitGraph(graph_t *graph, idx_t snvtxs, idx_t snedges) { - int i, j, k, sum, gsize; - idxtype *adjwgt; - float *nvwgt; - idxtype tvwgt[MAXNCON]; + graph_t *sgraph; - InitGraph(graph); + sgraph = CreateGraph(); - graph->nvtxs = nvtxs; - graph->nedges = xadj[nvtxs]; - graph->ncon = ncon; - graph->xadj = xadj; - graph->adjncy = adjncy; - - if (ncon == 1) { /* We are in the non mC mode */ - gsize = graph->nedges; /* This is for the edge weights */ - if ((wgtflag&2) == 0) - gsize += nvtxs; /* vwgts */ - if ((wgtflag&1) == 0) - gsize += nvtxs; /* vsize */ - - gsize += 2*nvtxs; - - graph->gdata = idxmalloc(gsize, "SetUpGraph: gdata"); - - /* Create the vertex/edge weight vectors if they are not supplied */ - gsize = 0; - if ((wgtflag&2) == 0) { - vwgt = graph->vwgt = idxset(nvtxs, 1, graph->gdata); - gsize += nvtxs; - } - else - graph->vwgt = vwgt; + sgraph->nvtxs = snvtxs; + sgraph->nedges = snedges; + sgraph->ncon = graph->ncon; - if ((wgtflag&1) == 0) { - vsize = graph->vsize = idxset(nvtxs, 1, graph->gdata); - gsize += nvtxs; - } - else - graph->vsize = vsize; + /* Allocate memory for the split graph */ + sgraph->xadj = imalloc(snvtxs+1, "SetupSplitGraph: xadj"); + sgraph->vwgt = imalloc(sgraph->ncon*snvtxs, "SetupSplitGraph: vwgt"); + sgraph->adjncy = imalloc(snedges, "SetupSplitGraph: adjncy"); + sgraph->adjwgt = imalloc(snedges, "SetupSplitGraph: adjwgt"); + sgraph->label = imalloc(snvtxs, "SetupSplitGraph: label"); + sgraph->tvwgt = imalloc(sgraph->ncon, "SetupSplitGraph: tvwgt"); + sgraph->invtvwgt = rmalloc(sgraph->ncon, "SetupSplitGraph: invtvwgt"); - /* Allocate memory for edge weights and initialize them to the sum of the vsize */ - adjwgt = graph->adjwgt = graph->gdata+gsize; - gsize += graph->nedges; + if (graph->vsize) + sgraph->vsize = imalloc(snvtxs, "SetupSplitGraph: vsize"); - for (i=0; iadjwgtsum = graph->gdata + gsize; - gsize += nvtxs; +/*************************************************************************/ +/*! This function creates and initializes a graph_t data structure */ +/*************************************************************************/ +graph_t *CreateGraph(void) +{ + graph_t *graph; - for (i=0; iadjwgtsum[i] = sum; - } + graph = (graph_t *)gk_malloc(sizeof(graph_t), "CreateGraph: graph"); - graph->cmap = graph->gdata + gsize; - gsize += nvtxs; + InitGraph(graph); - } - else { /* Set up the graph in MOC mode */ - gsize = graph->nedges; - if ((wgtflag&1) == 0) - gsize += nvtxs; + return graph; +} - gsize += 2*nvtxs; - graph->gdata = idxmalloc(gsize, "SetUpGraph: gdata"); - gsize = 0; +/*************************************************************************/ +/*! This function initializes a graph_t data structure */ +/*************************************************************************/ +void InitGraph(graph_t *graph) +{ + memset((void *)graph, 0, sizeof(graph_t)); + + /* graph size constants */ + graph->nvtxs = -1; + graph->nedges = -1; + graph->ncon = -1; + graph->mincut = -1; + graph->minvol = -1; + graph->nbnd = -1; + + /* memory for the graph structure */ + graph->xadj = NULL; + graph->vwgt = NULL; + graph->vsize = NULL; + graph->adjncy = NULL; + graph->adjwgt = NULL; + graph->label = NULL; + graph->cmap = NULL; + graph->tvwgt = NULL; + graph->invtvwgt = NULL; + + /* by default these are set to true, but the can be explicitly changed afterwards */ + graph->free_xadj = 1; + graph->free_vwgt = 1; + graph->free_vsize = 1; + graph->free_adjncy = 1; + graph->free_adjwgt = 1; + + + /* memory for the partition/refinement structure */ + graph->where = NULL; + graph->pwgts = NULL; + graph->id = NULL; + graph->ed = NULL; + graph->bndptr = NULL; + graph->bndind = NULL; + graph->nrinfo = NULL; + graph->ckrinfo = NULL; + graph->vkrinfo = NULL; + + /* linked-list structure */ + graph->coarser = NULL; + graph->finer = NULL; - /* Create the normalized vertex weights along each constrain */ - if ((wgtflag&2) == 0) - vwgt = idxsmalloc(nvtxs, 1, "SetUpGraph: vwgt"); +} - for (i=0; invwgt = fmalloc(ncon*nvtxs, "SetUpGraph: nvwgt"); - for (i=0; ifree_xadj) + gk_free((void **)&graph->xadj, LTERM); + if (graph->free_vwgt) + gk_free((void **)&graph->vwgt, LTERM); + if (graph->free_vsize) + gk_free((void **)&graph->vsize, LTERM); + if (graph->free_adjncy) + gk_free((void **)&graph->adjncy, LTERM); + if (graph->free_adjwgt) + gk_free((void **)&graph->adjwgt, LTERM); +} - /* Create the vsize vector if it is not supplied */ - if ((wgtflag&1) == 0) { - vsize = graph->vsize = idxset(nvtxs, 1, graph->gdata); - gsize += nvtxs; - } - else - graph->vsize = vsize; +/*************************************************************************/ +/*! This function frees the refinement/partition memory stored in a graph */ +/*************************************************************************/ +void FreeRData(graph_t *graph) +{ - /* Allocate memory for edge weights and initialize them to the sum of the vsize */ - adjwgt = graph->adjwgt = graph->gdata+gsize; - gsize += graph->nedges; + /* The following is for the -minconn and -contig to work properly in + the vol-refinement routines */ + if ((void *)graph->ckrinfo == (void *)graph->vkrinfo) + graph->ckrinfo = NULL; - for (i=0; iadjwgtsum = graph->gdata + gsize; - gsize += nvtxs; + /* free partition/refinement structure */ + gk_free((void **)&graph->where, &graph->pwgts, &graph->id, &graph->ed, + &graph->bndptr, &graph->bndind, &graph->nrinfo, &graph->ckrinfo, + &graph->vkrinfo, LTERM); +} - for (i=0; iadjwgtsum[i] = sum; - } - graph->cmap = graph->gdata + gsize; - gsize += nvtxs; +/*************************************************************************/ +/*! This function deallocates any memory stored in a graph */ +/*************************************************************************/ +void FreeGraph(graph_t **r_graph) +{ + graph_t *graph; - } + graph = *r_graph; - if (OpType != OP_KVMETIS) { - graph->label = idxmalloc(nvtxs, "SetUpGraph: label"); + /* free the graph structure's fields */ + FreeSData(graph); - for (i=0; ilabel[i] = i; - } + /* free the partition/refinement fields */ + FreeRData(graph); + + gk_free((void **)&graph->tvwgt, &graph->invtvwgt, &graph->label, + &graph->cmap, &graph, LTERM); + *r_graph = NULL; } -/************************************************************************* -* This function randomly permutes the adjacency lists of a graph -**************************************************************************/ -void RandomizeGraph(GraphType *graph) +/*************************************************************************/ +/*! This function writes the key contents of the graph on disk and frees + the associated memory */ +/*************************************************************************/ +void graph_WriteToDisk(ctrl_t *ctrl, graph_t *graph) { - int i, j, k, l, tmp, nvtxs; - idxtype *xadj, *adjncy, *adjwgt; + idx_t nvtxs, ncon, *xadj; + static int gID = 1; + char outfile[1024]; + FILE *fpout; - nvtxs = graph->nvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - for (i=0; iondisk == 0) + return; + + if (sizeof(idx_t)*(graph->nvtxs*(graph->ncon+1)+2*graph->xadj[graph->nvtxs]) < 128*1024*1024) + return; + + if (graph->gID > 0) { + sprintf(outfile, "metis%d.%d", (int)ctrl->pid, graph->gID); + gk_rmpath(outfile); } -} + graph->gID = gID++; + sprintf(outfile, "metis%d.%d", (int)ctrl->pid, graph->gID); -/************************************************************************* -* This function checks whether or not partition pid is contigous -**************************************************************************/ -int IsConnectedSubdomain(CtrlType *ctrl, GraphType *graph, int pid, int report) -{ - int i, j, k, nvtxs, first, last, nleft, ncmps, wgt; - idxtype *xadj, *adjncy, *where, *touched, *queue; - idxtype *cptr; + if ((fpout = fopen(outfile, "wb")) == NULL) + return; nvtxs = graph->nvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; - where = graph->where; - - touched = idxsmalloc(nvtxs, 0, "IsConnected: touched"); - queue = idxmalloc(nvtxs, "IsConnected: queue"); - cptr = idxmalloc(nvtxs+1, "IsConnected: cptr"); + ncon = graph->ncon; + xadj = graph->xadj; - nleft = 0; - for (i=0; ifree_xadj) { + if (fwrite(graph->xadj, sizeof(idx_t), nvtxs+1, fpout) != nvtxs+1) + goto error; } - - for (i=0; ifree_vwgt) { + if (fwrite(graph->vwgt, sizeof(idx_t), nvtxs*ncon, fpout) != nvtxs*ncon) + goto error; } - - touched[i] = 1; - queue[0] = i; - first = 0; last = 1; - - cptr[0] = 0; /* This actually points to queue */ - ncmps = 0; - while (first != nleft) { - if (first == last) { /* Find another starting vertex */ - cptr[++ncmps] = first; - for (i=0; ifree_adjncy) { + if (fwrite(graph->adjncy, sizeof(idx_t), xadj[nvtxs], fpout) != xadj[nvtxs]) + goto error; + } + if (graph->free_adjwgt) { + if (fwrite(graph->adjwgt, sizeof(idx_t), xadj[nvtxs], fpout) != xadj[nvtxs]) + goto error; } - cptr[++ncmps] = first; - - if (ncmps > 1 && report) { - printf("The graph has %d connected components in partition %d:\t", ncmps, pid); - for (i=0; ivwgt[queue[j]]; - printf("[%5d %5d] ", cptr[i+1]-cptr[i], wgt); - /* - if (cptr[i+1]-cptr[i] == 1) - printf("[%d %d] ", queue[cptr[i]], xadj[queue[cptr[i]]+1]-xadj[queue[cptr[i]]]); - */ + if (ctrl->objtype == METIS_OBJTYPE_VOL) { + if (graph->free_vsize) { + if (fwrite(graph->vsize, sizeof(idx_t), nvtxs, fpout) != nvtxs) + goto error; } - printf("\n"); } - GKfree(&touched, &queue, &cptr, LTERM); - - return (ncmps == 1 ? 1 : 0); + fclose(fpout); + + if (graph->free_xadj) + gk_free((void **)&graph->xadj, LTERM); + if (graph->free_vwgt) + gk_free((void **)&graph->vwgt, LTERM); + if (graph->free_vsize) + gk_free((void **)&graph->vsize, LTERM); + if (graph->free_adjncy) + gk_free((void **)&graph->adjncy, LTERM); + if (graph->free_adjwgt) + gk_free((void **)&graph->adjwgt, LTERM); + + graph->ondisk = 1; + return; + +error: + printf("Failed on writing %s\n", outfile); + fclose(fpout); + gk_rmpath(outfile); + graph->ondisk = 0; } -/************************************************************************* -* This function checks whether a graph is contigous or not -**************************************************************************/ -int IsConnected(CtrlType *ctrl, GraphType *graph, int report) +/*************************************************************************/ +/*! This function reads the key contents of a graph from the disk */ +/*************************************************************************/ +void graph_ReadFromDisk(ctrl_t *ctrl, graph_t *graph) { - int i, j, k, nvtxs, first, last; - idxtype *xadj, *adjncy, *touched, *queue; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; - - touched = idxsmalloc(nvtxs, 0, "IsConnected: touched"); - queue = idxmalloc(nvtxs, "IsConnected: queue"); - - touched[0] = 1; - queue[0] = 0; - first = 0; last = 1; - - while (first < last) { - i = queue[first++]; - for (j=xadj[i]; jondisk == 0) + return; /* this graph is not on the disk */ + sprintf(infile, "metis%d.%d", (int)ctrl->pid, graph->gID); -/************************************************************************* -* This function checks whether or not partition pid is contigous -**************************************************************************/ -int IsConnected2(GraphType *graph, int report) -{ - int i, j, k, nvtxs, first, last, nleft, ncmps, wgt; - idxtype *xadj, *adjncy, *where, *touched, *queue; - idxtype *cptr; + if ((fpin = fopen(infile, "rb")) == NULL) + return; nvtxs = graph->nvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; - where = graph->where; - - touched = idxsmalloc(nvtxs, 0, "IsConnected: touched"); - queue = idxmalloc(nvtxs, "IsConnected: queue"); - cptr = idxmalloc(nvtxs+1, "IsConnected: cptr"); - - nleft = nvtxs; - touched[0] = 1; - queue[0] = 0; - first = 0; last = 1; - - cptr[0] = 0; /* This actually points to queue */ - ncmps = 0; - while (first != nleft) { - if (first == last) { /* Find another starting vertex */ - cptr[++ncmps] = first; - for (i=0; incon; - i = queue[first++]; - for (j=xadj[i]; j 1 && report) { - printf("%d connected components:\t", ncmps); - for (i=0; i 200) - printf("[%5d] ", cptr[i+1]-cptr[i]); - } - printf("\n"); + if (graph->free_xadj) { + graph->xadj = imalloc(nvtxs+1, "graph_ReadFromDisk: xadj"); + if (fread(graph->xadj, sizeof(idx_t), nvtxs+1, fpin) != nvtxs+1) + goto error; } - - GKfree(&touched, &queue, &cptr, LTERM); - - return (ncmps == 1 ? 1 : 0); -} - - -/************************************************************************* -* This function returns the number of connected components in cptr,cind -* The separator of the graph is used to split it and then find its components. -**************************************************************************/ -int FindComponents(CtrlType *ctrl, GraphType *graph, idxtype *cptr, idxtype *cind) -{ - int i, j, k, nvtxs, first, last, nleft, ncmps, wgt; - idxtype *xadj, *adjncy, *where, *touched, *queue; - - nvtxs = graph->nvtxs; xadj = graph->xadj; - adjncy = graph->adjncy; - where = graph->where; - touched = idxsmalloc(nvtxs, 0, "IsConnected: queue"); - - for (i=0; inbnd; i++) - touched[graph->bndind[i]] = 1; - - queue = cind; - - nleft = 0; - for (i=0; ifree_vwgt) { + graph->vwgt = imalloc(nvtxs*ncon, "graph_ReadFromDisk: vwgt"); + if (fread(graph->vwgt, sizeof(idx_t), nvtxs*ncon, fpin) != nvtxs*ncon) + goto error; } - for (i=0; ifree_adjncy) { + graph->adjncy = imalloc(xadj[nvtxs], "graph_ReadFromDisk: adjncy"); + if (fread(graph->adjncy, sizeof(idx_t), xadj[nvtxs], fpin) != xadj[nvtxs]) + goto error; } - touched[i] = 1; - queue[0] = i; - first = 0; last = 1; - - cptr[0] = 0; /* This actually points to queue */ - ncmps = 0; - while (first != nleft) { - if (first == last) { /* Find another starting vertex */ - cptr[++ncmps] = first; - for (i=0; ifree_adjwgt) { + graph->adjwgt = imalloc(xadj[nvtxs], "graph_ReadFromDisk: adjwgt"); + if (fread(graph->adjwgt, sizeof(idx_t), xadj[nvtxs], fpin) != xadj[nvtxs]) + goto error; + } - i = queue[first++]; - for (j=xadj[i]; jobjtype == METIS_OBJTYPE_VOL) { + if (graph->free_vsize) { + graph->vsize = imalloc(nvtxs, "graph_ReadFromDisk: vsize"); + if (fread(graph->vsize, sizeof(idx_t), nvtxs, fpin) != nvtxs) + goto error; } } - cptr[++ncmps] = first; - - free(touched); - return ncmps; -} + fclose(fpin); +// printf("ondisk: deleting %s\n", infile); + gk_rmpath(infile); + graph->gID = 0; + graph->ondisk = 0; + return; +error: + fclose(fpin); + gk_rmpath(infile); + graph->ondisk = 0; + gk_errexit(SIGERR, "Failed to restore graph %s from the disk.\n", infile); +} diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/initpart.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/initpart.c index a3c8570e..04c7cb48 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/initpart.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/initpart.c @@ -9,119 +9,223 @@ * Started 7/23/97 * George * - * $Id: initpart.c,v 1.2 2003/07/31 16:23:29 karypis Exp $ - * */ -#include +#include "metislib.h" -/************************************************************************* -* This function computes the initial bisection of the coarsest graph -**************************************************************************/ -void Init2WayPartition(CtrlType *ctrl, GraphType *graph, int *tpwgts, float ubfactor) +/*************************************************************************/ +/*! This function computes the initial bisection of the coarsest graph */ +/*************************************************************************/ +void Init2WayPartition(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, + idx_t niparts) { - int dbglvl; + mdbglvl_et dbglvl; + + ASSERT(graph->tvwgt[0] >= 0); dbglvl = ctrl->dbglvl; - IFSET(ctrl->dbglvl, DBG_REFINE, ctrl->dbglvl -= DBG_REFINE); - IFSET(ctrl->dbglvl, DBG_MOVEINFO, ctrl->dbglvl -= DBG_MOVEINFO); + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, ctrl->dbglvl -= METIS_DBG_REFINE); + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, ctrl->dbglvl -= METIS_DBG_MOVEINFO); - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->InitPartTmr)); - switch (ctrl->IType) { - case IPART_GGPKL: - if (graph->nedges == 0) - RandomBisection(ctrl, graph, tpwgts, ubfactor); + switch (ctrl->iptype) { + case METIS_IPTYPE_RANDOM: + if (graph->ncon == 1) + RandomBisection(ctrl, graph, ntpwgts, niparts); else - GrowBisection(ctrl, graph, tpwgts, ubfactor); + McRandomBisection(ctrl, graph, ntpwgts, niparts); break; - case 3: - RandomBisection(ctrl, graph, tpwgts, ubfactor); + + case METIS_IPTYPE_GROW: + if (graph->nedges == 0) + if (graph->ncon == 1) + RandomBisection(ctrl, graph, ntpwgts, niparts); + else + McRandomBisection(ctrl, graph, ntpwgts, niparts); + else + if (graph->ncon == 1) + GrowBisection(ctrl, graph, ntpwgts, niparts); + else + McGrowBisection(ctrl, graph, ntpwgts, niparts); break; + default: - errexit("Unknown initial partition type: %d\n", ctrl->IType); + gk_errexit(SIGERR, "Unknown initial partition type: %d\n", ctrl->iptype); } - IFSET(ctrl->dbglvl, DBG_IPART, printf("Initial Cut: %d\n", graph->mincut)); - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); + IFSET(ctrl->dbglvl, METIS_DBG_IPART, printf("Initial Cut: %"PRIDX"\n", graph->mincut)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->InitPartTmr)); ctrl->dbglvl = dbglvl; -/* - IsConnectedSubdomain(ctrl, graph, 0); - IsConnectedSubdomain(ctrl, graph, 1); -*/ } -/************************************************************************* -* This function computes the initial bisection of the coarsest graph -**************************************************************************/ -void InitSeparator(CtrlType *ctrl, GraphType *graph, float ubfactor) + +/*************************************************************************/ +/*! This function computes the initial separator of the coarsest graph */ +/*************************************************************************/ +void InitSeparator(ctrl_t *ctrl, graph_t *graph, idx_t niparts) { - int dbglvl; + real_t ntpwgts[2] = {0.5, 0.5}; + mdbglvl_et dbglvl; dbglvl = ctrl->dbglvl; - IFSET(ctrl->dbglvl, DBG_REFINE, ctrl->dbglvl -= DBG_REFINE); - IFSET(ctrl->dbglvl, DBG_MOVEINFO, ctrl->dbglvl -= DBG_MOVEINFO); + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, ctrl->dbglvl -= METIS_DBG_REFINE); + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, ctrl->dbglvl -= METIS_DBG_MOVEINFO); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->InitPartTmr)); + + /* this is required for the cut-based part of the refinement */ + Setup2WayBalMultipliers(ctrl, graph, ntpwgts); - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); + switch (ctrl->iptype) { + case METIS_IPTYPE_EDGE: + if (graph->nedges == 0) + RandomBisection(ctrl, graph, ntpwgts, niparts); + else + GrowBisection(ctrl, graph, ntpwgts, niparts); + + Compute2WayPartitionParams(ctrl, graph); + ConstructSeparator(ctrl, graph); + break; + + case METIS_IPTYPE_NODE: + GrowBisectionNode(ctrl, graph, ntpwgts, niparts); + break; - GrowBisectionNode(ctrl, graph, ubfactor); - Compute2WayNodePartitionParams(ctrl, graph); + default: + gk_errexit(SIGERR, "Unknown iptype of %"PRIDX"\n", ctrl->iptype); + } - IFSET(ctrl->dbglvl, DBG_IPART, printf("Initial Sep: %d\n", graph->mincut)); - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); + IFSET(ctrl->dbglvl, METIS_DBG_IPART, printf("Initial Sep: %"PRIDX"\n", graph->mincut)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->InitPartTmr)); ctrl->dbglvl = dbglvl; } +/*************************************************************************/ +/*! This function computes a bisection of a graph by randomly assigning + the vertices followed by a bisection refinement. + The resulting partition is returned in graph->where. +*/ +/*************************************************************************/ +void RandomBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, + idx_t niparts) +{ + idx_t i, ii, j, k, nvtxs, pwgts[2], zeromaxpwgt, from, me, + bestcut=0, icut, mincut, inbfs; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *where; + idx_t *perm, *bestwhere; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + + Allocate2WayPartitionMemory(ctrl, graph); + where = graph->where; + + bestwhere = iwspacemalloc(ctrl, nvtxs); + perm = iwspacemalloc(ctrl, nvtxs); + + zeromaxpwgt = ctrl->ubfactors[0]*graph->tvwgt[0]*ntpwgts[0]; + + for (inbfs=0; inbfs 0) { + irandArrayPermute(nvtxs, perm, nvtxs/2, 1); + pwgts[1] = graph->tvwgt[0]; + pwgts[0] = 0; + + for (ii=0; ii zeromaxpwgt) + break; + } + } + } + + /* Do some partition refinement */ + Compute2WayPartitionParams(ctrl, graph); + /* printf("IPART: %3"PRIDX" [%5"PRIDX" %5"PRIDX"] [%5"PRIDX" %5"PRIDX"] %5"PRIDX"\n", graph->nvtxs, pwgts[0], pwgts[1], graph->pwgts[0], graph->pwgts[1], graph->mincut); */ -/************************************************************************* -* This function takes a graph and produces a bisection by using a region -* growing algorithm. The resulting partition is returned in -* graph->where -**************************************************************************/ -void GrowBisection(CtrlType *ctrl, GraphType *graph, int *tpwgts, float ubfactor) + Balance2Way(ctrl, graph, ntpwgts); + /* printf("BPART: [%5"PRIDX" %5"PRIDX"] %5"PRIDX"\n", graph->pwgts[0], graph->pwgts[1], graph->mincut); */ + + FM_2WayRefine(ctrl, graph, ntpwgts, 4); + /* printf("RPART: [%5"PRIDX" %5"PRIDX"] %5"PRIDX"\n", graph->pwgts[0], graph->pwgts[1], graph->mincut); */ + + if (inbfs==0 || bestcut > graph->mincut) { + bestcut = graph->mincut; + icopy(nvtxs, where, bestwhere); + if (bestcut == 0) + break; + } + } + + graph->mincut = bestcut; + icopy(nvtxs, bestwhere, where); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function takes a graph and produces a bisection by using a region + growing algorithm. The resulting bisection is refined using FM. + The resulting partition is returned in graph->where. +*/ +/*************************************************************************/ +void GrowBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, + idx_t niparts) { - int i, j, k, nvtxs, drain, nleft, first, last, pwgts[2], minpwgt[2], maxpwgt[2], from, bestcut, icut, mincut, me, pass, nbfs; - idxtype *xadj, *vwgt, *adjncy, *adjwgt, *where; - idxtype *queue, *touched, *gain, *bestwhere; + idx_t i, j, k, nvtxs, drain, nleft, first, last, + pwgts[2], oneminpwgt, onemaxpwgt, + from, me, bestcut=0, icut, mincut, inbfs; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *where; + idx_t *queue, *touched, *gain, *bestwhere; + WCOREPUSH; - nvtxs = graph->nvtxs; - xadj = graph->xadj; - vwgt = graph->vwgt; + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vwgt = graph->vwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; Allocate2WayPartitionMemory(ctrl, graph); where = graph->where; - bestwhere = idxmalloc(nvtxs, "BisectGraph: bestwhere"); - queue = idxmalloc(nvtxs, "BisectGraph: queue"); - touched = idxmalloc(nvtxs, "BisectGraph: touched"); + bestwhere = iwspacemalloc(ctrl, nvtxs); + queue = iwspacemalloc(ctrl, nvtxs); + touched = iwspacemalloc(ctrl, nvtxs); - ASSERTP(tpwgts[0]+tpwgts[1] == idxsum(nvtxs, vwgt), ("%d %d\n", tpwgts[0]+tpwgts[1], idxsum(nvtxs, vwgt))); + onemaxpwgt = ctrl->ubfactors[0]*graph->tvwgt[0]*ntpwgts[1]; + oneminpwgt = (1.0/ctrl->ubfactors[0])*graph->tvwgt[0]*ntpwgts[1]; - maxpwgt[0] = ubfactor*tpwgts[0]; - maxpwgt[1] = ubfactor*tpwgts[1]; - minpwgt[0] = (1.0/ubfactor)*tpwgts[0]; - minpwgt[1] = (1.0/ubfactor)*tpwgts[1]; + for (inbfs=0; inbfsCoarsenTo ? SMALLNIPARTS : LARGENIPARTS); - bestcut = idxsum(nvtxs, graph->adjwgtsum)+1; /* The +1 is for the 0 edges case */ - for (; nbfs>0; nbfs--) { - idxset(nvtxs, 0, touched); + iset(nvtxs, 0, touched); - pwgts[1] = tpwgts[0]+tpwgts[1]; + pwgts[1] = graph->tvwgt[0]; pwgts[0] = 0; - idxset(nvtxs, 1, where); - queue[0] = RandomInRange(nvtxs); + queue[0] = irandInRange(nvtxs); touched[queue[0]] = 1; - first = 0; last = 1; + first = 0; + last = 1; nleft = nvtxs-1; drain = 0; @@ -131,7 +235,7 @@ void GrowBisection(CtrlType *ctrl, GraphType *graph, int *tpwgts, float ubfactor if (nleft == 0 || drain) break; - k = RandomInRange(nleft); + k = irandInRange(nleft); for (i=0; i 0 && pwgts[1]-vwgt[i] < minpwgt[1]) { + if (pwgts[0] > 0 && pwgts[1]-vwgt[i] < oneminpwgt) { drain = 1; continue; } where[i] = 0; INC_DEC(pwgts[0], pwgts[1], vwgt[i]); - if (pwgts[1] <= maxpwgt[1]) + if (pwgts[1] <= onemaxpwgt) break; drain = 0; @@ -170,142 +275,251 @@ void GrowBisection(CtrlType *ctrl, GraphType *graph, int *tpwgts, float ubfactor } /* Check to see if we hit any bad limiting cases */ - if (pwgts[1] == 0) { - i = RandomInRange(nvtxs); - where[i] = 1; - INC_DEC(pwgts[1], pwgts[0], vwgt[i]); - } + if (pwgts[1] == 0) + where[irandInRange(nvtxs)] = 1; + if (pwgts[0] == 0) + where[irandInRange(nvtxs)] = 0; /************************************************************* * Do some partition refinement **************************************************************/ Compute2WayPartitionParams(ctrl, graph); - /*printf("IPART: %3d [%5d %5d] [%5d %5d] %5d\n", graph->nvtxs, pwgts[0], pwgts[1], graph->pwgts[0], graph->pwgts[1], graph->mincut); */ + /* + printf("IPART: %3"PRIDX" [%5"PRIDX" %5"PRIDX"] [%5"PRIDX" %5"PRIDX"] %5"PRIDX"\n", + graph->nvtxs, pwgts[0], pwgts[1], graph->pwgts[0], graph->pwgts[1], graph->mincut); + */ + + Balance2Way(ctrl, graph, ntpwgts); + /* + printf("BPART: [%5"PRIDX" %5"PRIDX"] %5"PRIDX"\n", graph->pwgts[0], + graph->pwgts[1], graph->mincut); + */ + + FM_2WayRefine(ctrl, graph, ntpwgts, ctrl->niter); + /* + printf("RPART: [%5"PRIDX" %5"PRIDX"] %5"PRIDX"\n", graph->pwgts[0], + graph->pwgts[1], graph->mincut); + */ + + if (inbfs == 0 || bestcut > graph->mincut) { + bestcut = graph->mincut; + icopy(nvtxs, where, bestwhere); + if (bestcut == 0) + break; + } + } + + graph->mincut = bestcut; + icopy(nvtxs, bestwhere, where); - Balance2Way(ctrl, graph, tpwgts, ubfactor); - /*printf("BPART: [%5d %5d] %5d\n", graph->pwgts[0], graph->pwgts[1], graph->mincut);*/ + WCOREPOP; +} - FM_2WayEdgeRefine(ctrl, graph, tpwgts, 4); - /*printf("RPART: [%5d %5d] %5d\n", graph->pwgts[0], graph->pwgts[1], graph->mincut);*/ - if (bestcut > graph->mincut) { +/*************************************************************************/ +/*! This function takes a multi-constraint graph and computes a bisection + by randomly assigning the vertices and then refining it. The resulting + partition is returned in graph->where. +*/ +/**************************************************************************/ +void McRandomBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, + idx_t niparts) +{ + idx_t i, ii, j, k, nvtxs, ncon, from, bestcut=0, mincut, inbfs, qnum; + idx_t *bestwhere, *where, *perm, *counts; + idx_t *vwgt; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + vwgt = graph->vwgt; + + Allocate2WayPartitionMemory(ctrl, graph); + where = graph->where; + + bestwhere = iwspacemalloc(ctrl, nvtxs); + perm = iwspacemalloc(ctrl, nvtxs); + counts = iwspacemalloc(ctrl, ncon); + + for (inbfs=0; inbfs<2*niparts; inbfs++) { + irandArrayPermute(nvtxs, perm, nvtxs/2, 1); + iset(ncon, 0, counts); + + /* partition by splitting the queues randomly */ + for (ii=0; iiniter); + Balance2Way(ctrl, graph, ntpwgts); + FM_2WayRefine(ctrl, graph, ntpwgts, ctrl->niter); + Balance2Way(ctrl, graph, ntpwgts); + FM_2WayRefine(ctrl, graph, ntpwgts, ctrl->niter); + + if (inbfs == 0 || bestcut >= graph->mincut) { bestcut = graph->mincut; - idxcopy(nvtxs, where, bestwhere); + icopy(nvtxs, where, bestwhere); if (bestcut == 0) break; } } graph->mincut = bestcut; - idxcopy(nvtxs, bestwhere, where); + icopy(nvtxs, bestwhere, where); - GKfree((void **)(&bestwhere), &queue, &touched, LTERM); + WCOREPOP; } +/*************************************************************************/ +/*! This function takes a multi-constraint graph and produces a bisection + by using a region growing algorithm. The resulting partition is + returned in graph->where. +*/ +/*************************************************************************/ +void McGrowBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, + idx_t niparts) +{ + idx_t i, j, k, nvtxs, ncon, from, bestcut=0, mincut, inbfs; + idx_t *bestwhere, *where; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + + Allocate2WayPartitionMemory(ctrl, graph); + where = graph->where; + + bestwhere = iwspacemalloc(ctrl, nvtxs); + for (inbfs=0; inbfs<2*niparts; inbfs++) { + iset(nvtxs, 1, where); + where[irandInRange(nvtxs)] = 0; -/************************************************************************* -* This function takes a graph and produces a bisection by using a region -* growing algorithm. The resulting partition is returned in -* graph->where -**************************************************************************/ -void GrowBisectionNode(CtrlType *ctrl, GraphType *graph, float ubfactor) + Compute2WayPartitionParams(ctrl, graph); + + Balance2Way(ctrl, graph, ntpwgts); + FM_2WayRefine(ctrl, graph, ntpwgts, ctrl->niter); + Balance2Way(ctrl, graph, ntpwgts); + FM_2WayRefine(ctrl, graph, ntpwgts, ctrl->niter); + + if (inbfs == 0 || bestcut >= graph->mincut) { + bestcut = graph->mincut; + icopy(nvtxs, where, bestwhere); + if (bestcut == 0) + break; + } + } + + graph->mincut = bestcut; + icopy(nvtxs, bestwhere, where); + + WCOREPOP; +} + + +/*************************************************************************/ +/* This function takes a graph and produces a tri-section into left, right, + and separator using a region growing algorithm. The resulting separator + is refined using node FM. + The resulting partition is returned in graph->where. +*/ +/**************************************************************************/ +void GrowBisectionNode(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, + idx_t niparts) { - int i, j, k, nvtxs, drain, nleft, first, last, pwgts[2], tpwgts[2], minpwgt[2], maxpwgt[2], from, bestcut, icut, mincut, me, pass, nbfs; - idxtype *xadj, *vwgt, *adjncy, *adjwgt, *where, *bndind; - idxtype *queue, *touched, *gain, *bestwhere; + idx_t i, j, k, nvtxs, drain, nleft, first, last, pwgts[2], oneminpwgt, + onemaxpwgt, from, me, bestcut=0, icut, mincut, inbfs; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *where, *bndind; + idx_t *queue, *touched, *gain, *bestwhere; - nvtxs = graph->nvtxs; - xadj = graph->xadj; - vwgt = graph->vwgt; + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vwgt = graph->vwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; - bestwhere = idxmalloc(nvtxs, "BisectGraph: bestwhere"); - queue = idxmalloc(nvtxs, "BisectGraph: queue"); - touched = idxmalloc(nvtxs, "BisectGraph: touched"); - - tpwgts[0] = idxsum(nvtxs, vwgt); - tpwgts[1] = tpwgts[0]/2; - tpwgts[0] -= tpwgts[1]; - - maxpwgt[0] = ubfactor*tpwgts[0]; - maxpwgt[1] = ubfactor*tpwgts[1]; - minpwgt[0] = (1.0/ubfactor)*tpwgts[0]; - minpwgt[1] = (1.0/ubfactor)*tpwgts[1]; - - /* Allocate memory for graph->rdata. Allocate sufficient memory for both edge and node */ - graph->rdata = idxmalloc(5*nvtxs+3, "GrowBisectionNode: graph->rdata"); - graph->pwgts = graph->rdata; - graph->where = graph->rdata + 3; - graph->bndptr = graph->rdata + nvtxs + 3; - graph->bndind = graph->rdata + 2*nvtxs + 3; - graph->nrinfo = (NRInfoType *)(graph->rdata + 3*nvtxs + 3); - graph->id = graph->rdata + 3*nvtxs + 3; - graph->ed = graph->rdata + 4*nvtxs + 3; + bestwhere = iwspacemalloc(ctrl, nvtxs); + queue = iwspacemalloc(ctrl, nvtxs); + touched = iwspacemalloc(ctrl, nvtxs); + + onemaxpwgt = ctrl->ubfactors[0]*graph->tvwgt[0]*0.5; + oneminpwgt = (1.0/ctrl->ubfactors[0])*graph->tvwgt[0]*0.5; + + + /* Allocate refinement memory. Allocate sufficient memory for both edge and node */ + graph->pwgts = imalloc(3, "GrowBisectionNode: pwgts"); + graph->where = imalloc(nvtxs, "GrowBisectionNode: where"); + graph->bndptr = imalloc(nvtxs, "GrowBisectionNode: bndptr"); + graph->bndind = imalloc(nvtxs, "GrowBisectionNode: bndind"); + graph->id = imalloc(nvtxs, "GrowBisectionNode: id"); + graph->ed = imalloc(nvtxs, "GrowBisectionNode: ed"); + graph->nrinfo = (nrinfo_t *)gk_malloc(nvtxs*sizeof(nrinfo_t), "GrowBisectionNode: nrinfo"); - where = graph->where; + where = graph->where; bndind = graph->bndind; - nbfs = (nvtxs <= ctrl->CoarsenTo ? SMALLNIPARTS : LARGENIPARTS); - bestcut = tpwgts[0]+tpwgts[1]; - for (nbfs++; nbfs>0; nbfs--) { - idxset(nvtxs, 0, touched); + for (inbfs=0; inbfstvwgt[0]; pwgts[0] = 0; - idxset(nvtxs, 1, where); - - queue[0] = RandomInRange(nvtxs); + queue[0] = irandInRange(nvtxs); touched[queue[0]] = 1; first = 0; last = 1; nleft = nvtxs-1; drain = 0; /* Start the BFS from queue to get a partition */ - if (nbfs >= 1) { - for (;;) { - if (first == last) { /* Empty. Disconnected graph! */ - if (nleft == 0 || drain) - break; + for (;;) { + if (first == last) { /* Empty. Disconnected graph! */ + if (nleft == 0 || drain) + break; - k = RandomInRange(nleft); - for (i=0; inbnd; i++) - where[bndind[i]] = 2; + for (i=0; inbnd; i++) { + j = bndind[i]; + if (xadj[j+1]-xadj[j] > 0) /* ignore islands */ + where[j] = 2; + } Compute2WayNodePartitionParams(ctrl, graph); - FM_2WayNodeRefine(ctrl, graph, ubfactor, 6); - - /* printf("ISep: [%d %d %d] %d\n", graph->pwgts[0], graph->pwgts[1], graph->pwgts[2], bestcut); */ - - if (bestcut > graph->mincut) { + FM_2WayNodeRefine2Sided(ctrl, graph, 1); + FM_2WayNodeRefine1Sided(ctrl, graph, 4); + + /* + printf("ISep: [%"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"] %"PRIDX"\n", + inbfs, graph->pwgts[0], graph->pwgts[1], graph->pwgts[2], bestcut); + */ + + if (inbfs == 0 || bestcut > graph->mincut) { bestcut = graph->mincut; - idxcopy(nvtxs, where, bestwhere); + icopy(nvtxs, where, bestwhere); } } graph->mincut = bestcut; - idxcopy(nvtxs, bestwhere, where); - - Compute2WayNodePartitionParams(ctrl, graph); + icopy(nvtxs, bestwhere, where); - GKfree((void **)(&bestwhere), &queue, &touched, LTERM); + WCOREPOP; } -/************************************************************************* -* This function takes a graph and produces a bisection by using a region -* growing algorithm. The resulting partition is returned in -* graph->where -**************************************************************************/ -void RandomBisection(CtrlType *ctrl, GraphType *graph, int *tpwgts, float ubfactor) +/*************************************************************************/ +/* This function takes a graph and produces a tri-section into left, right, + and separator using a region growing algorithm. The resulting separator + is refined using node FM. + The resulting partition is returned in graph->where. +*/ +/**************************************************************************/ +void GrowBisectionNode2(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, + idx_t niparts) { - int i, ii, j, k, nvtxs, pwgts[2], minpwgt[2], maxpwgt[2], from, bestcut, icut, mincut, me, pass, nbfs; - idxtype *xadj, *vwgt, *adjncy, *adjwgt, *where; - idxtype *perm, *bestwhere; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - vwgt = graph->vwgt; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - Allocate2WayPartitionMemory(ctrl, graph); - where = graph->where; - - bestwhere = idxmalloc(nvtxs, "BisectGraph: bestwhere"); - perm = idxmalloc(nvtxs, "BisectGraph: queue"); - - ASSERTP(tpwgts[0]+tpwgts[1] == idxsum(nvtxs, vwgt), ("%d %d\n", tpwgts[0]+tpwgts[1], idxsum(nvtxs, vwgt))); - - maxpwgt[0] = ubfactor*tpwgts[0]; - maxpwgt[1] = ubfactor*tpwgts[1]; - minpwgt[0] = (1.0/ubfactor)*tpwgts[0]; - minpwgt[1] = (1.0/ubfactor)*tpwgts[1]; + idx_t i, j, k, nvtxs, bestcut=0, mincut, inbfs; + idx_t *xadj, *where, *bndind, *bestwhere; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + + /* Allocate refinement memory. Allocate sufficient memory for both edge and node */ + graph->pwgts = imalloc(3, "GrowBisectionNode: pwgts"); + graph->where = imalloc(nvtxs, "GrowBisectionNode: where"); + graph->bndptr = imalloc(nvtxs, "GrowBisectionNode: bndptr"); + graph->bndind = imalloc(nvtxs, "GrowBisectionNode: bndind"); + graph->id = imalloc(nvtxs, "GrowBisectionNode: id"); + graph->ed = imalloc(nvtxs, "GrowBisectionNode: ed"); + graph->nrinfo = (nrinfo_t *)gk_malloc(nvtxs*sizeof(nrinfo_t), "GrowBisectionNode: nrinfo"); + + bestwhere = iwspacemalloc(ctrl, nvtxs); - nbfs = (nvtxs <= ctrl->CoarsenTo ? SMALLNIPARTS : LARGENIPARTS); - bestcut = idxsum(nvtxs, graph->adjwgtsum)+1; /* The +1 is for the 0 edges case */ - for (; nbfs>0; nbfs--) { - RandomPermute(nvtxs, perm, 1); + where = graph->where; + bndind = graph->bndind; - idxset(nvtxs, 1, where); - pwgts[1] = tpwgts[0]+tpwgts[1]; - pwgts[0] = 0; + for (inbfs=0; inbfs 0) + where[irandInRange(nvtxs)] = 0; + Compute2WayPartitionParams(ctrl, graph); + General2WayBalance(ctrl, graph, ntpwgts); + FM_2WayRefine(ctrl, graph, ntpwgts, ctrl->niter); - if (nbfs != 1) { - for (ii=0; ii minpwgt[0]) - break; - } - } + /* Construct and refine the vertex separator */ + for (i=0; inbnd; i++) { + j = bndind[i]; + if (xadj[j+1]-xadj[j] > 0) /* ignore islands */ + where[j] = 2; } - /************************************************************* - * Do some partition refinement - **************************************************************/ - Compute2WayPartitionParams(ctrl, graph); - /* printf("IPART: %3d [%5d %5d] [%5d %5d] %5d\n", graph->nvtxs, pwgts[0], pwgts[1], graph->pwgts[0], graph->pwgts[1], graph->mincut); */ - - Balance2Way(ctrl, graph, tpwgts, ubfactor); - /* printf("BPART: [%5d %5d] %5d\n", graph->pwgts[0], graph->pwgts[1], graph->mincut); */ + Compute2WayNodePartitionParams(ctrl, graph); + FM_2WayNodeRefine2Sided(ctrl, graph, 4); - FM_2WayEdgeRefine(ctrl, graph, tpwgts, 4); - /* printf("RPART: [%5d %5d] %5d\n", graph->pwgts[0], graph->pwgts[1], graph->mincut); */ + /* + printf("ISep: [%"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"] %"PRIDX"\n", + inbfs, graph->pwgts[0], graph->pwgts[1], graph->pwgts[2], bestcut); + */ - if (bestcut > graph->mincut) { + if (inbfs == 0 || bestcut > graph->mincut) { bestcut = graph->mincut; - idxcopy(nvtxs, where, bestwhere); - if (bestcut == 0) - break; + icopy(nvtxs, where, bestwhere); } } graph->mincut = bestcut; - idxcopy(nvtxs, bestwhere, where); + icopy(nvtxs, bestwhere, where); - GKfree((void **)(&bestwhere), &perm, LTERM); + WCOREPOP; } - - - diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kmetis.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kmetis.c index c5dbd56f..536d0a4c 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kmetis.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kmetis.c @@ -1,128 +1,562 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * kmetis.c - * - * This file contains the top level routines for the multilevel k-way partitioning - * algorithm KMETIS. - * - * Started 7/28/97 - * George - * - * $Id: kmetis.c,v 1.1 2003/07/16 15:55:04 karypis Exp $ - * - */ - -#include - - -/************************************************************************* -* This function is the entry point for KMETIS -**************************************************************************/ -void METIS_PartGraphKway(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, - int *options, int *edgecut, idxtype *part) +/*! +\file +\brief The top-level routines for multilevel k-way partitioning that minimizes + the edge cut. + +\date Started 7/28/1997 +\author George +\author Copyright 1997-2011, Regents of the University of Minnesota +\version\verbatim $Id: kmetis.c 20398 2016-11-22 17:17:12Z karypis $ \endverbatim +*/ + +#include "metislib.h" + + +/*************************************************************************/ +/*! This function is the entry point for MCKMETIS */ +/*************************************************************************/ +int METIS_PartGraphKway(idx_t *nvtxs, idx_t *ncon, idx_t *xadj, idx_t *adjncy, + idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, idx_t *nparts, + real_t *tpwgts, real_t *ubvec, idx_t *options, idx_t *objval, + idx_t *part) { - int i; - float *tpwgts; + int sigrval=0, renumber=0; + graph_t *graph; + ctrl_t *ctrl; + + /* set up malloc cleaning code and signal catchers */ + if (!gk_malloc_init()) + return METIS_ERROR_MEMORY; + + gk_sigtrap(); + + if ((sigrval = gk_sigcatch()) != 0) + goto SIGTHROW; + + /* set up the run parameters */ + ctrl = SetupCtrl(METIS_OP_KMETIS, options, *ncon, *nparts, tpwgts, ubvec); + if (!ctrl) { + gk_siguntrap(); + return METIS_ERROR_INPUT; + } + + /* if required, change the numbering to 0 */ + if (ctrl->numflag == 1) { + Change2CNumbering(*nvtxs, xadj, adjncy); + renumber = 1; + } + + /* set up the graph */ + graph = SetupGraph(ctrl, *nvtxs, *ncon, xadj, adjncy, vwgt, vsize, adjwgt); + + /* set up multipliers for making balance computations easier */ + SetupKWayBalMultipliers(ctrl, graph); + + /* set various run parameters that depend on the graph */ + ctrl->CoarsenTo = gk_max((*nvtxs)/(40*gk_log2(*nparts)), 30*(*nparts)); + ctrl->nIparts = (ctrl->nIparts != -1 ? ctrl->nIparts : (ctrl->CoarsenTo == 30*(*nparts) ? 4 : 5)); - tpwgts = fmalloc(*nparts, "KMETIS: tpwgts"); - for (i=0; i<*nparts; i++) - tpwgts[i] = 1.0/(1.0*(*nparts)); + /* take care contiguity requests for disconnected graphs */ + if (ctrl->contig && !IsConnected(graph, 0)) + gk_errexit(SIGERR, "METIS Error: A contiguous partition is requested for a non-contiguous input graph.\n"); + + /* allocate workspace memory */ + AllocateWorkSpace(ctrl, graph); - METIS_WPartGraphKway(nvtxs, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, - tpwgts, options, edgecut, part); - GKfree((void **)&tpwgts, LTERM); + /* start the partitioning */ + IFSET(ctrl->dbglvl, METIS_DBG_TIME, InitTimers(ctrl)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->TotalTmr)); + + iset(*nvtxs, 0, part); + if (ctrl->dbglvl&512) + *objval = (*nparts == 1 ? 0 : BlockKWayPartitioning(ctrl, graph, part)); + else + *objval = (*nparts == 1 ? 0 : MlevelKWayPartitioning(ctrl, graph, part)); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->TotalTmr)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, PrintTimers(ctrl)); + + /* clean up */ + FreeCtrl(&ctrl); + +SIGTHROW: + /* if required, change the numbering back to 1 */ + if (renumber) + Change2FNumbering(*nvtxs, xadj, adjncy, part); + + gk_siguntrap(); + gk_malloc_cleanup(0); + + return metis_rcode(sigrval); } -/************************************************************************* -* This function is the entry point for KWMETIS -**************************************************************************/ -void METIS_WPartGraphKway(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, - float *tpwgts, int *options, int *edgecut, idxtype *part) +/*************************************************************************/ +/*! This function computes a k-way partitioning of a graph that minimizes + the specified objective function. + + \param ctrl is the control structure + \param graph is the graph to be partitioned + \param part is the vector that on return will store the partitioning + + \returns the objective value of the partitioning. The partitioning + itself is stored in the part vector. +*/ +/*************************************************************************/ +idx_t MlevelKWayPartitioning(ctrl_t *ctrl, graph_t *graph, idx_t *part) { - int i, j; - GraphType graph; - CtrlType ctrl; + idx_t i, j, objval=0, curobj=0, bestobj=0; + real_t curbal=0.0, bestbal=0.0; + graph_t *cgraph; + int status; - if (*numflag == 1) - Change2CNumbering(*nvtxs, xadj, adjncy); - SetUpGraph(&graph, OP_KMETIS, *nvtxs, 1, xadj, adjncy, vwgt, adjwgt, *wgtflag); + for (i=0; incuts; i++) { + cgraph = CoarsenGraph(ctrl, graph); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->InitPartTmr)); + AllocateKWayPartitionMemory(ctrl, cgraph); + + /* Release the work space */ + FreeWorkSpace(ctrl); + + /* Compute the initial partitioning */ + InitKWayPartitioning(ctrl, cgraph); + + /* Re-allocate the work space */ + AllocateWorkSpace(ctrl, graph); + AllocateRefinementWorkSpace(ctrl, graph->nedges, 2*cgraph->nedges); - if (options[0] == 0) { /* Use the default parameters */ - ctrl.CType = KMETIS_CTYPE; - ctrl.IType = KMETIS_ITYPE; - ctrl.RType = KMETIS_RTYPE; - ctrl.dbglvl = KMETIS_DBGLVL; + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->InitPartTmr)); + IFSET(ctrl->dbglvl, METIS_DBG_IPART, + printf("Initial %"PRIDX"-way partitioning cut: %"PRIDX"\n", ctrl->nparts, objval)); + + RefineKWay(ctrl, graph, cgraph); + + switch (ctrl->objtype) { + case METIS_OBJTYPE_CUT: + curobj = graph->mincut; + break; + + case METIS_OBJTYPE_VOL: + curobj = graph->minvol; + break; + + default: + gk_errexit(SIGERR, "Unknown objtype: %d\n", ctrl->objtype); + } + + curbal = ComputeLoadImbalanceDiff(graph, ctrl->nparts, ctrl->pijbm, ctrl->ubfactors); + + if (i == 0 + || (curbal <= 0.0005 && bestobj > curobj) + || (bestbal > 0.0005 && curbal < bestbal)) { + icopy(graph->nvtxs, graph->where, part); + bestobj = curobj; + bestbal = curbal; + } + + FreeRData(graph); + + if (bestobj == 0) + break; } - else { - ctrl.CType = options[OPTION_CTYPE]; - ctrl.IType = options[OPTION_ITYPE]; - ctrl.RType = options[OPTION_RTYPE]; - ctrl.dbglvl = options[OPTION_DBGLVL]; + + FreeGraph(&graph); + + return bestobj; +} + + +/*************************************************************************/ +/*! This function computes the initial k-way partitioning using PMETIS +*/ +/*************************************************************************/ +void InitKWayPartitioning(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, ntrials, options[METIS_NOPTIONS], curobj=0, bestobj=0; + idx_t *bestwhere=NULL; + real_t *ubvec=NULL; + int status; + + METIS_SetDefaultOptions(options); + //options[METIS_OPTION_NITER] = 10; + options[METIS_OPTION_NITER] = ctrl->niter; + options[METIS_OPTION_OBJTYPE] = METIS_OBJTYPE_CUT; + options[METIS_OPTION_NO2HOP] = ctrl->no2hop; + options[METIS_OPTION_ONDISK] = ctrl->ondisk; + options[METIS_OPTION_DROPEDGES] = ctrl->dropedges; + //options[METIS_OPTION_DBGLVL] = ctrl->dbglvl; + + ubvec = rmalloc(graph->ncon, "InitKWayPartitioning: ubvec"); + for (i=0; incon; i++) + ubvec[i] = (real_t)pow(ctrl->ubfactors[i], 1.0/log(ctrl->nparts)); + + + switch (ctrl->objtype) { + case METIS_OBJTYPE_CUT: + case METIS_OBJTYPE_VOL: + options[METIS_OPTION_NCUTS] = ctrl->nIparts; + status = METIS_PartGraphRecursive(&graph->nvtxs, &graph->ncon, + graph->xadj, graph->adjncy, graph->vwgt, graph->vsize, + graph->adjwgt, &ctrl->nparts, ctrl->tpwgts, ubvec, + options, &curobj, graph->where); + + if (status != METIS_OK) + gk_errexit(SIGERR, "Failed during initial partitioning\n"); + + break; + +#ifdef XXX /* This does not seem to help */ + case METIS_OBJTYPE_VOL: + bestwhere = imalloc(graph->nvtxs, "InitKWayPartitioning: bestwhere"); + options[METIS_OPTION_NCUTS] = 2; + + ntrials = (ctrl->nIparts+1)/2; + for (i=0; invtxs, &graph->ncon, + graph->xadj, graph->adjncy, graph->vwgt, graph->vsize, + graph->adjwgt, &ctrl->nparts, ctrl->tpwgts, ubvec, + options, &curobj, graph->where); + if (status != METIS_OK) + gk_errexit(SIGERR, "Failed during initial partitioning\n"); + + curobj = ComputeVolume(graph, graph->where); + + if (i == 0 || bestobj > curobj) { + bestobj = curobj; + if (i < ntrials-1) + icopy(graph->nvtxs, graph->where, bestwhere); + } + + if (bestobj == 0) + break; + } + if (bestobj != curobj) + icopy(graph->nvtxs, bestwhere, graph->where); + + break; +#endif + + default: + gk_errexit(SIGERR, "Unknown objtype: %d\n", ctrl->objtype); } - ctrl.optype = OP_KMETIS; - ctrl.CoarsenTo = amax((*nvtxs)/(40*log2Int(*nparts)), 20*(*nparts)); - ctrl.maxvwgt = 1.5*((graph.vwgt ? idxsum(*nvtxs, graph.vwgt) : (*nvtxs))/ctrl.CoarsenTo); - InitRandom(-1); + gk_free((void **)&ubvec, &bestwhere, LTERM); + +} + + +/*************************************************************************/ +/*! This function computes a k-way partitioning of a graph that minimizes + the specified objective function. - AllocateWorkSpace(&ctrl, &graph, *nparts); + \param ctrl is the control structure + \param graph is the graph to be partitioned + \param part is the vector that on return will store the partitioning - IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); + \returns the objective value of the partitioning. The partitioning + itself is stored in the part vector. +*/ +/*************************************************************************/ +idx_t BlockKWayPartitioning(ctrl_t *ctrl, graph_t *graph, idx_t *part) +{ + idx_t i, ii, j, nvtxs, objval=0; + idx_t *vwgt; + idx_t nparts, mynparts; + idx_t *fpwgts, *cpwgts, *fpart, *perm; + ipq_t *queue; - *edgecut = MlevelKWayPartitioning(&ctrl, &graph, *nparts, part, tpwgts, 1.03); + WCOREPUSH; - IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); - IFSET(ctrl.dbglvl, DBG_TIME, PrintTimers(&ctrl)); + nvtxs = graph->nvtxs; + vwgt = graph->vwgt; - FreeWorkSpace(&ctrl, &graph); + nparts = ctrl->nparts; - if (*numflag == 1) - Change2FNumbering(*nvtxs, xadj, adjncy, part); + mynparts = gk_min(100*nparts, sqrt(nvtxs)); + + for (i=0; iwhere. +*/ +/*************************************************************************/ +idx_t GrowMultisection(ctrl_t *ctrl, graph_t *graph, idx_t nparts, idx_t *where) { - int i, j, nvtxs, tvwgt, tpwgts2[2]; - GraphType *cgraph; - int wgtflag=3, numflag=0, options[10], edgecut; + idx_t i, j, k, l, nvtxs, nleft, first, last; + idx_t *xadj, *vwgt, *adjncy; + idx_t *queue; + idx_t tvwgt, maxpwgt, *pwgts; - cgraph = Coarsen2Way(ctrl, graph); + WCOREPUSH; - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); - AllocateKWayPartitionMemory(ctrl, cgraph, nparts); + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vwgt = graph->xadj; + adjncy = graph->adjncy; - options[0] = 1; - options[OPTION_CTYPE] = MATCH_SHEMKWAY; - options[OPTION_ITYPE] = IPART_GGPKL; - options[OPTION_RTYPE] = RTYPE_FM; - options[OPTION_DBGLVL] = 0; + queue = iwspacemalloc(ctrl, nvtxs); - METIS_WPartGraphRecursive(&cgraph->nvtxs, cgraph->xadj, cgraph->adjncy, cgraph->vwgt, - cgraph->adjwgt, &wgtflag, &numflag, &nparts, tpwgts, options, - &edgecut, cgraph->where); - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); - IFSET(ctrl->dbglvl, DBG_IPART, printf("Initial %d-way partitioning cut: %d\n", nparts, edgecut)); + /* Select the seeds for the nparts-way BFS */ + for (nleft=0, i=0; i 1) /* a seed's degree should be > 1 */ + where[nleft++] = i; + } + nparts = gk_min(nparts, nleft); + for (i=0; idbglvl, DBG_KWAYPINFO, ComputePartitionInfo(cgraph, nparts, cgraph->where)); + pwgts = iset(nparts, 0, iwspacemalloc(ctrl, nparts)); + tvwgt = isum(nvtxs, vwgt, 1); + maxpwgt = (1.5*tvwgt)/nparts; + + iset(nvtxs, -1, where); + for (i=0; invtxs, graph->where, part); - GKfree((void **)(&graph->gdata), &graph->rdata, LTERM); + /* Start the BFS from queue to get a partition */ + while (first < last) { + i = queue[first++]; + l = where[i]; + if (pwgts[l] > maxpwgt) + continue; + + for (j=xadj[i]; j maxpwgt) + break; + pwgts[l] += vwgt[k]; + where[k] = l; + queue[last++] = k; + nleft--; + } + } + } + + /* Assign the unassigned vertices randomly to the nparts partitions */ + if (nleft > 0) { + for (i=0; imincut; + WCOREPOP; + return nparts; } + +/*************************************************************************/ +/*! This function balances the partitioning using label propagation. +*/ +/*************************************************************************/ +void BalanceAndRefineLP(ctrl_t *ctrl, graph_t *graph, idx_t nparts, idx_t *where) +{ + idx_t ii, i, j, k, u, v, nvtxs, iter; + idx_t *xadj, *vwgt, *adjncy, *adjwgt; + idx_t tvwgt, *pwgts, maxpwgt, minpwgt; + idx_t *perm; + idx_t from, to, nmoves, nnbrs, *nbrids, *nbrwgts, *nbrmrks; + real_t ubfactor; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + + pwgts = iset(nparts, 0, iwspacemalloc(ctrl, nparts)); + + ubfactor = I2RUBFACTOR(ctrl->ufactor); + tvwgt = isum(nvtxs, vwgt, 1); + maxpwgt = (ubfactor*tvwgt)/nparts; + minpwgt = (1.0*tvwgt)/(ubfactor*nparts); + + for (i=0; idbglvl&METIS_DBG_REFINE) + printf("BLP: nparts: %"PRIDX", min-max: [%"PRIDX", %"PRIDX"], bal: %7.4"PRREAL", cut: %9"PRIDX"\n", + nparts, minpwgt, maxpwgt, 1.0*imax(nparts, pwgts, 1)*nparts/tvwgt, ComputeCut(graph, where)); + for (iter=0; iterniter; iter++) { + if (imax(nparts, pwgts, 1)*nparts < ubfactor*tvwgt) + break; + + irandArrayPermute(nvtxs, perm, nvtxs/8, 1); + nmoves = 0; + + for (ii=0; ii maxpwgt) + continue; /* skip if 'to' is overweight */ + + if ((k = nbrmrks[to]) == -1) { + nbrmrks[to] = k = nnbrs++; + nbrids[k] = to; + } + nbrwgts[k] += xadj[v+1]-xadj[v]; + } + if (nnbrs == 0) + continue; + + to = nbrids[iargmax(nnbrs, nbrwgts, 1)]; + if (from != to) { + where[u] = to; + INC_DEC(pwgts[to], pwgts[from], vwgt[u]); + nmoves++; + } + + for (k=0; kdbglvl&METIS_DBG_REFINE) + printf(" nmoves: %8"PRIDX", bal: %7.4"PRREAL", cut: %9"PRIDX"\n", + nmoves, 1.0*imax(nparts, pwgts, 1)*nparts/tvwgt, ComputeCut(graph, where)); + + if (nmoves == 0) + break; + } + + /* perform a fixed number of refinement LP iterations */ + if (ctrl->dbglvl&METIS_DBG_REFINE) + printf("RLP: nparts: %"PRIDX", min-max: [%"PRIDX", %"PRIDX"], bal: %7.4"PRREAL", cut: %9"PRIDX"\n", + nparts, minpwgt, maxpwgt, 1.0*imax(nparts, pwgts, 1)*nparts/tvwgt, ComputeCut(graph, where)); + for (iter=0; iterniter; iter++) { + irandArrayPermute(nvtxs, perm, nvtxs/8, 1); + nmoves = 0; + + for (ii=0; ii maxpwgt) + continue; /* skip if 'to' is overweight */ + + if ((k = nbrmrks[to]) == -1) { + nbrmrks[to] = k = nnbrs++; + nbrids[k] = to; + } + nbrwgts[k] += adjwgt[j]; + } + if (nnbrs == 0) + continue; + + to = nbrids[iargmax(nnbrs, nbrwgts, 1)]; + if (from != to) { + where[u] = to; + INC_DEC(pwgts[to], pwgts[from], vwgt[u]); + nmoves++; + } + + for (k=0; kdbglvl&METIS_DBG_REFINE) + printf(" nmoves: %8"PRIDX", bal: %7.4"PRREAL", cut: %9"PRIDX"\n", + nmoves, 1.0*imax(nparts, pwgts, 1)*nparts/tvwgt, ComputeCut(graph, where)); + + if (nmoves == 0) + break; + } + + WCOREPOP; +} diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kvmetis.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kvmetis.c deleted file mode 100644 index e688a447..00000000 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kvmetis.c +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * kvmetis.c - * - * This file contains the top level routines for the multilevel k-way partitioning - * algorithm KMETIS. - * - * Started 7/28/97 - * George - * - * $Id: kvmetis.c,v 1.1 2003/07/16 15:55:04 karypis Exp $ - * - */ - -#include - - -/************************************************************************* -* This function is the entry point for KMETIS -**************************************************************************/ -void METIS_PartGraphVKway(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *vsize, int *wgtflag, int *numflag, int *nparts, - int *options, int *volume, idxtype *part) -{ - int i; - float *tpwgts; - - tpwgts = fmalloc(*nparts, "KMETIS: tpwgts"); - for (i=0; i<*nparts; i++) - tpwgts[i] = 1.0/(1.0*(*nparts)); - - METIS_WPartGraphVKway(nvtxs, xadj, adjncy, vwgt, vsize, wgtflag, numflag, nparts, - tpwgts, options, volume, part); - GKfree((void **)&tpwgts, LTERM); -} - - -/************************************************************************* -* This function is the entry point for KWMETIS -**************************************************************************/ -void METIS_WPartGraphVKway(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *vsize, int *wgtflag, int *numflag, int *nparts, - float *tpwgts, int *options, int *volume, idxtype *part) -{ - int i, j; - GraphType graph; - CtrlType ctrl; - - if (*numflag == 1) - Change2CNumbering(*nvtxs, xadj, adjncy); - - VolSetUpGraph(&graph, OP_KVMETIS, *nvtxs, 1, xadj, adjncy, vwgt, vsize, *wgtflag); - - if (options[0] == 0) { /* Use the default parameters */ - ctrl.CType = KVMETIS_CTYPE; - ctrl.IType = KVMETIS_ITYPE; - ctrl.RType = KVMETIS_RTYPE; - ctrl.dbglvl = KVMETIS_DBGLVL; - } - else { - ctrl.CType = options[OPTION_CTYPE]; - ctrl.IType = options[OPTION_ITYPE]; - ctrl.RType = options[OPTION_RTYPE]; - ctrl.dbglvl = options[OPTION_DBGLVL]; - } - ctrl.optype = OP_KVMETIS; - ctrl.CoarsenTo = amax((*nvtxs)/(40*log2Int(*nparts)), 20*(*nparts)); - ctrl.maxvwgt = 1.5*((graph.vwgt ? idxsum(*nvtxs, graph.vwgt) : (*nvtxs))/ctrl.CoarsenTo); - - InitRandom(-1); - - AllocateWorkSpace(&ctrl, &graph, *nparts); - - IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); - - *volume = MlevelVolKWayPartitioning(&ctrl, &graph, *nparts, part, tpwgts, 1.03); - - IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); - IFSET(ctrl.dbglvl, DBG_TIME, PrintTimers(&ctrl)); - - FreeWorkSpace(&ctrl, &graph); - - if (*numflag == 1) - Change2FNumbering(*nvtxs, xadj, adjncy, part); -} - - -/************************************************************************* -* This function takes a graph and produces a bisection of it -**************************************************************************/ -int MlevelVolKWayPartitioning(CtrlType *ctrl, GraphType *graph, int nparts, idxtype *part, - float *tpwgts, float ubfactor) -{ - int i, j, nvtxs, tvwgt, tpwgts2[2]; - GraphType *cgraph; - int wgtflag=3, numflag=0, options[10], edgecut; - - cgraph = Coarsen2Way(ctrl, graph); - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); - AllocateVolKWayPartitionMemory(ctrl, cgraph, nparts); - - options[0] = 1; - options[OPTION_CTYPE] = MATCH_SHEMKWAY; - options[OPTION_ITYPE] = IPART_GGPKL; - options[OPTION_RTYPE] = RTYPE_FM; - options[OPTION_DBGLVL] = 0; - - METIS_WPartGraphRecursive(&cgraph->nvtxs, cgraph->xadj, cgraph->adjncy, cgraph->vwgt, - cgraph->adjwgt, &wgtflag, &numflag, &nparts, tpwgts, options, - &edgecut, cgraph->where); - - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); - IFSET(ctrl->dbglvl, DBG_IPART, printf("Initial %d-way partitioning cut: %d\n", nparts, edgecut)); - - IFSET(ctrl->dbglvl, DBG_KWAYPINFO, ComputePartitionInfo(cgraph, nparts, cgraph->where)); - - RefineVolKWay(ctrl, graph, cgraph, nparts, tpwgts, ubfactor); - - idxcopy(graph->nvtxs, graph->where, part); - - GKfree((void **)&graph->gdata, &graph->rdata, LTERM); - - return graph->minvol; - -} - diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kwayfm.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kwayfm.c index 170dcf30..7c493750 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kwayfm.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kwayfm.c @@ -1,466 +1,738 @@ -/* - * kwayfm.c - * - * This file contains code that implements the multilevel k-way refinement - * - * Started 7/28/97 - * George - * - * $Id: kwayfm.c,v 1.1 2003/07/16 15:55:04 karypis Exp $ - * - */ - -#include - - -/************************************************************************* -* This function performs k-way refinement -**************************************************************************/ -void Random_KWayEdgeRefine(CtrlType *ctrl, GraphType *graph, int nparts, float *tpwgts, float ubfactor, int npasses, int ffactor) +/*! +\file +\brief Routines for k-way refinement + +\date Started 7/28/97 +\author George +\author Copyright 1997-2009, Regents of the University of Minnesota +\version $Id: kwayfm.c 17513 2014-08-05 16:20:50Z dominique $ +*/ + +#include "metislib.h" + + + +/*************************************************************************/ +/* Top-level routine for k-way partitioning refinement. This routine just + calls the appropriate refinement routine based on the objectives and + constraints. */ +/*************************************************************************/ +void Greedy_KWayOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, + real_t ffactor, idx_t omode) +{ + switch (ctrl->objtype) { + case METIS_OBJTYPE_CUT: + if (graph->ncon == 1) + Greedy_KWayCutOptimize(ctrl, graph, niter, ffactor, omode); + else + Greedy_McKWayCutOptimize(ctrl, graph, niter, ffactor, omode); + break; + + case METIS_OBJTYPE_VOL: + if (graph->ncon == 1) + Greedy_KWayVolOptimize(ctrl, graph, niter, ffactor, omode); + else + Greedy_McKWayVolOptimize(ctrl, graph, niter, ffactor, omode); + break; + + default: + gk_errexit(SIGERR, "Unknown objtype of %d\n", ctrl->objtype); + } +} + + +/*************************************************************************/ +/*! K-way partitioning optimization in which the vertices are visited in + decreasing ed/sqrt(nnbrs)-id order. Note this is just an + approximation, as the ed is often split across different subdomains + and the sqrt(nnbrs) is just a crude approximation. + + \param graph is the graph that is being refined. + \param niter is the number of refinement iterations. + \param ffactor is the \em fudge-factor for allowing positive gain moves + to violate the max-pwgt constraint. + \param omode is the type of optimization that will performed among + OMODE_REFINE and OMODE_BALANCE + +*/ +/**************************************************************************/ +void Greedy_KWayCutOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, + real_t ffactor, idx_t omode) { - int i, ii, iii, j, jj, k, l, pass, nvtxs, nmoves, nbnd, tvwgt, myndegrees; - int from, me, to, oldcut, vwgt, gain; - idxtype *xadj, *adjncy, *adjwgt; - idxtype *where, *pwgts, *perm, *bndptr, *bndind, *minwgt, *maxwgt, *itpwgts; - EDegreeType *myedegrees; - RInfoType *myrinfo; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; + /* Common variables to all types of kway-refinement/balancing routines */ + idx_t i, ii, iii, j, k, l, pass, nvtxs, nparts, gain; + idx_t from, me, to, oldcut, vwgt; + idx_t *xadj, *adjncy, *adjwgt; + idx_t *where, *pwgts, *perm, *bndptr, *bndind, *minpwgts, *maxpwgts; + idx_t nmoved, nupd, *vstatus, *updptr, *updind; + idx_t maxndoms, *safetos=NULL, *nads=NULL, *doms=NULL, **adids=NULL, **adwgts=NULL; + idx_t *bfslvl=NULL, *bfsind=NULL, *bfsmrk=NULL; + idx_t bndtype = (omode == OMODE_REFINE ? BNDTYPE_REFINE : BNDTYPE_BALANCE); + real_t *tpwgts, ubfactor; + + /* Edgecut-specific/different variables */ + idx_t nbnd, oldnnbrs; + rpq_t *queue; + real_t rgain; + ckrinfo_t *myrinfo; + cnbr_t *mynbrs; + + ffactor = 0.0; + WCOREPUSH; + + /* Link the graph fields */ + nvtxs = graph->nvtxs; + xadj = graph->xadj; adjncy = graph->adjncy; adjwgt = graph->adjwgt; - bndptr = graph->bndptr; bndind = graph->bndind; + bndptr = graph->bndptr; where = graph->where; pwgts = graph->pwgts; + nparts = ctrl->nparts; + tpwgts = ctrl->tpwgts; + /* Setup the weight intervals of the various subdomains */ - minwgt = idxwspacemalloc(ctrl, nparts); - maxwgt = idxwspacemalloc(ctrl, nparts); - itpwgts = idxwspacemalloc(ctrl, nparts); - tvwgt = idxsum(nparts, pwgts); - ASSERT(tvwgt == idxsum(nvtxs, graph->vwgt)); + minpwgts = iwspacemalloc(ctrl, nparts); + maxpwgts = iwspacemalloc(ctrl, nparts); + + if (omode == OMODE_BALANCE) + ubfactor = ctrl->ubfactors[0]; + else + ubfactor = gk_max(ctrl->ubfactors[0], ComputeLoadImbalance(graph, nparts, ctrl->pijbm)); for (i=0; itvwgt[0]*ubfactor; + minpwgts[i] = tpwgts[i]*graph->tvwgt[0]*(1.0/ubfactor); + } + + perm = iwspacemalloc(ctrl, nvtxs); + + + /* This stores the valid target subdomains. It is used when ctrl->minconn to + control the subdomains to which moves are allowed to be made. + When ctrl->minconn is false, the default values of 2 allow all moves to + go through and it does not interfere with the zero-gain move selection. */ + safetos = iset(nparts, 2, iwspacemalloc(ctrl, nparts)); + + if (ctrl->minconn) { + ComputeSubDomainGraph(ctrl, graph); + + nads = ctrl->nads; + adids = ctrl->adids; + adwgts = ctrl->adwgts; + doms = iset(nparts, 0, ctrl->pvec1); + } + + + /* Setup updptr, updind like boundary info to keep track of the vertices whose + vstatus's need to be reset at the end of the inner iteration */ + vstatus = iset(nvtxs, VPQSTATUS_NOTPRESENT, iwspacemalloc(ctrl, nvtxs)); + updptr = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs)); + updind = iwspacemalloc(ctrl, nvtxs); + + if (ctrl->contig) { + /* The arrays that will be used for limited check of articulation points */ + bfslvl = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + bfsind = iwspacemalloc(ctrl, nvtxs); + bfsmrk = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); } - perm = idxwspacemalloc(ctrl, nvtxs); + if (ctrl->dbglvl&METIS_DBG_REFINE) { + printf("%s: [%6"PRIDX" %6"PRIDX"]-[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL"," + " Nv-Nb[%6"PRIDX" %6"PRIDX"], Cut: %6"PRIDX, + (omode == OMODE_REFINE ? "GRC" : "GBC"), + pwgts[iargmin(nparts, pwgts,1)], imax(nparts, pwgts,1), minpwgts[0], maxpwgts[0], + ComputeLoadImbalance(graph, nparts, ctrl->pijbm), + graph->nvtxs, graph->nbnd, graph->mincut); + if (ctrl->minconn) + printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads,1), isum(nparts, nads,1)); + printf("\n"); + } - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("Partitions: [%6d %6d]-[%6d %6d], Balance: %5.3f, Nv-Nb[%6d %6d]. Cut: %6d\n", - pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], minwgt[0], maxwgt[0], - 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nvtxs, graph->nbnd, - graph->mincut)); + queue = rpqCreate(nvtxs); - for (pass=0; passmincut); + if (omode == OMODE_REFINE) + ASSERT(CheckBnd2(graph)); + + if (omode == OMODE_BALANCE) { + /* Check to see if things are out of balance, given the tolerance */ + for (i=0; i maxpwgts[i] || pwgts[i] < minpwgts[i]) + break; + } + if (i == nparts) /* Things are balanced. Return right away */ + break; + } oldcut = graph->mincut; - nbnd = graph->nbnd; + nbnd = graph->nbnd; + nupd = 0; - RandomPermute(nbnd, perm, 1); - for (nmoves=iii=0; iiinbnd; iii++) { - ii = perm[iii]; - if (ii >= nbnd) - continue; - i = bndind[ii]; + if (ctrl->minconn) + maxndoms = imax(nparts, nads,1); + + /* Insert the boundary vertices in the priority queue */ + irandArrayPermute(nbnd, perm, nbnd/4, 1); + for (ii=0; iickrinfo[i].nnbrs > 0 ? + 1.0*graph->ckrinfo[i].ed/sqrt(graph->ckrinfo[i].nnbrs) : 0.0) + - graph->ckrinfo[i].id; + rpqInsert(queue, i, rgain); + vstatus[i] = VPQSTATUS_PRESENT; + ListInsert(nupd, updind, updptr, i); + } + + /* Start extracting vertices from the queue and try to move them */ + for (nmoved=0, iii=0;;iii++) { + if ((i = rpqGetTop(queue)) == -1) + break; + vstatus[i] = VPQSTATUS_EXTRACTED; - myrinfo = graph->rinfo+i; + myrinfo = graph->ckrinfo+i; + mynbrs = ctrl->cnbrpool + myrinfo->inbr; - if (myrinfo->ed >= myrinfo->id) { /* Total ED is too high */ - from = where[i]; - vwgt = graph->vwgt[i]; + from = where[i]; + vwgt = graph->vwgt[i]; - if (myrinfo->id > 0 && pwgts[from]-vwgt < minwgt[from]) - continue; /* This cannot be moved! */ +#ifdef XXX + /* Prevent moves that make 'from' domain underbalanced */ + if (omode == OMODE_REFINE) { + if (myrinfo->id > 0 && pwgts[from]-vwgt < minpwgts[from]) + continue; + } + else { /* OMODE_BALANCE */ + if (pwgts[from]-vwgt < minpwgts[from]) + continue; + } +#endif - myedegrees = myrinfo->edegrees; - myndegrees = myrinfo->ndegrees; + if (ctrl->contig && IsArticulationNode(i, xadj, adjncy, where, bfslvl, bfsind, bfsmrk)) + continue; - j = myrinfo->id; - for (k=0; kid. Allow good nodes to move */ - if (pwgts[to]+vwgt <= maxwgt[to]+ffactor*gain && gain >= 0) + if (ctrl->minconn) + SelectSafeTargetSubdomains(myrinfo, mynbrs, nads, adids, maxndoms, safetos, doms); + + /* Find the most promising subdomain to move to */ + if (omode == OMODE_REFINE) { + for (k=myrinfo->nnbrs-1; k>=0; k--) { + if (!safetos[to=mynbrs[k].pid]) + continue; + if (((mynbrs[k].ed > myrinfo->id) && + ((pwgts[from]-vwgt >= minpwgts[from]) || + (tpwgts[from]*pwgts[to] < tpwgts[to]*(pwgts[from]-vwgt))) && + ((pwgts[to]+vwgt <= maxpwgts[to]) || + (tpwgts[from]*pwgts[to] < tpwgts[to]*(pwgts[from]-vwgt))) + ) || + ((mynbrs[k].ed == myrinfo->id) && + (tpwgts[from]*pwgts[to] < tpwgts[to]*(pwgts[from]-vwgt))) + ) break; } - if (k == myndegrees) + if (k < 0) continue; /* break out if you did not find a candidate */ - for (j=k+1; j myedegrees[k].ed && pwgts[to]+vwgt <= maxwgt[to]) || - (myedegrees[j].ed == myedegrees[k].ed && - itpwgts[myedegrees[k].pid]*pwgts[to] < itpwgts[to]*pwgts[myedegrees[k].pid])) + for (j=k-1; j>=0; j--) { + if (!safetos[to=mynbrs[j].pid]) + continue; + if (((mynbrs[j].ed > mynbrs[k].ed) && + ((pwgts[from]-vwgt >= minpwgts[from]) || + (tpwgts[from]*pwgts[to] < tpwgts[to]*(pwgts[from]-vwgt))) && + ((pwgts[to]+vwgt <= maxpwgts[to]) || + (tpwgts[from]*pwgts[to] < tpwgts[to]*(pwgts[from]-vwgt))) + ) || + ((mynbrs[j].ed == mynbrs[k].ed) && + (tpwgts[mynbrs[k].pid]*pwgts[to] < tpwgts[to]*pwgts[mynbrs[k].pid])) + ) k = j; } - to = myedegrees[k].pid; + to = mynbrs[k].pid; + + gain = mynbrs[k].ed-myrinfo->id; + /* + if (!(gain > 0 + || (gain == 0 + && (pwgts[from] >= maxpwgts[from] + || tpwgts[to]*pwgts[from] > tpwgts[from]*(pwgts[to]+vwgt) + || (iii%2 == 0 && safetos[to] == 2) + ) + ) + ) + ) + continue; + */ + } + else { /* OMODE_BALANCE */ + for (k=myrinfo->nnbrs-1; k>=0; k--) { + if (!safetos[to=mynbrs[k].pid]) + continue; + /* the correctness of the following test follows from the correctness + of the similar test in the subsequent loop */ + if (from >= nparts || tpwgts[from]*pwgts[to] < tpwgts[to]*(pwgts[from]-vwgt)) + break; + } + if (k < 0) + continue; /* break out if you did not find a candidate */ - j = 0; - if (myedegrees[k].ed-myrinfo->id > 0) - j = 1; - else if (myedegrees[k].ed-myrinfo->id == 0) { - if ((iii&7) == 0 || pwgts[from] >= maxwgt[from] || itpwgts[from]*(pwgts[to]+vwgt) < itpwgts[to]*pwgts[from]) - j = 1; + for (j=k-1; j>=0; j--) { + if (!safetos[to=mynbrs[j].pid]) + continue; + if (tpwgts[mynbrs[k].pid]*pwgts[to] < tpwgts[to]*pwgts[mynbrs[k].pid]) + k = j; } - if (j == 0) - continue; - - /*===================================================================== - * If we got here, we can now move the vertex from 'from' to 'to' - *======================================================================*/ - graph->mincut -= myedegrees[k].ed-myrinfo->id; - - IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("\t\tMoving %6d to %3d. Gain: %4d. Cut: %6d\n", i, to, myedegrees[k].ed-myrinfo->id, graph->mincut)); - - /* Update where, weight, and ID/ED information of the vertex you moved */ - where[i] = to; - INC_DEC(pwgts[to], pwgts[from], vwgt); - myrinfo->ed += myrinfo->id-myedegrees[k].ed; - SWAP(myrinfo->id, myedegrees[k].ed, j); - if (myedegrees[k].ed == 0) - myedegrees[k] = myedegrees[--myrinfo->ndegrees]; - else - myedegrees[k].pid = from; - - if (myrinfo->ed-myrinfo->id < 0) - BNDDelete(nbnd, bndind, bndptr, i); - - /* Update the degrees of adjacent vertices */ - for (j=xadj[i]; jrinfo+ii; - if (myrinfo->edegrees == NULL) { - myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree; - ctrl->wspace.cdegree += xadj[ii+1]-xadj[ii]; - } - myedegrees = myrinfo->edegrees; + to = mynbrs[k].pid; - ASSERT(CheckRInfo(myrinfo)); + //if (pwgts[from] < maxpwgts[from] && pwgts[to] > minpwgts[to] && + // mynbrs[k].ed-myrinfo->id < 0) + // continue; + } - if (me == from) { - INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); - if (myrinfo->ed-myrinfo->id >= 0 && bndptr[ii] == -1) - BNDInsert(nbnd, bndind, bndptr, ii); - } - else if (me == to) { - INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); + /*===================================================================== + * If we got here, we can now move the vertex from 'from' to 'to' + *======================================================================*/ + graph->mincut -= mynbrs[k].ed-myrinfo->id; + nmoved++; - if (myrinfo->ed-myrinfo->id < 0 && bndptr[ii] != -1) - BNDDelete(nbnd, bndind, bndptr, ii); - } + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("\t\tMoving %6"PRIDX" from %3"PRIDX"/%"PRIDX" to %3"PRIDX"/%"PRIDX" [%6"PRIDX" %6"PRIDX"]. Gain: %4"PRIDX". Cut: %6"PRIDX"\n", + i, from, safetos[from], to, safetos[to], pwgts[from], pwgts[to], mynbrs[k].ed-myrinfo->id, graph->mincut)); - /* Remove contribution from the .ed of 'from' */ - if (me != from) { - for (k=0; kndegrees; k++) { - if (myedegrees[k].pid == from) { - if (myedegrees[k].ed == adjwgt[j]) - myedegrees[k] = myedegrees[--myrinfo->ndegrees]; - else - myedegrees[k].ed -= adjwgt[j]; - break; - } - } - } + /* Update the subdomain connectivity information */ + if (ctrl->minconn) { + /* take care of i's move itself */ + UpdateEdgeSubDomainGraph(ctrl, from, to, myrinfo->id-mynbrs[k].ed, &maxndoms); - /* Add contribution to the .ed of 'to' */ - if (me != to) { - for (k=0; kndegrees; k++) { - if (myedegrees[k].pid == to) { - myedegrees[k].ed += adjwgt[j]; - break; - } - } - if (k == myrinfo->ndegrees) { - myedegrees[myrinfo->ndegrees].pid = to; - myedegrees[myrinfo->ndegrees++].ed = adjwgt[j]; - } + /* take care of the adjacent vertices */ + for (j=xadj[i]; jckrinfo+ii; - ASSERT(myrinfo->ndegrees <= xadj[ii+1]-xadj[ii]); - ASSERT(CheckRInfo(myrinfo)); + oldnnbrs = myrinfo->nnbrs; - } - nmoves++; + UpdateAdjacentVertexInfoAndBND(ctrl, ii, xadj[ii+1]-xadj[ii], me, + from, to, myrinfo, adjwgt[j], nbnd, bndptr, bndind, bndtype); + + UpdateQueueInfo(queue, vstatus, ii, me, from, to, myrinfo, oldnnbrs, + nupd, updptr, updind, bndtype); + + ASSERT(myrinfo->nnbrs <= xadj[ii+1]-xadj[ii]); } + } graph->nbnd = nbnd; - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("\t[%6d %6d], Balance: %5.3f, Nb: %6d. Nmoves: %5d, Cut: %6d, Vol: %6d\n", - pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], - 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nbnd, nmoves, graph->mincut, ComputeVolume(graph, where))); + /* Reset the vstatus and associated data structures */ + for (i=0; idbglvl&METIS_DBG_REFINE) { + printf("\t[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL", Nb: %6"PRIDX"." + " Nmoves: %5"PRIDX", Cut: %6"PRIDX", Vol: %6"PRIDX, + pwgts[iargmin(nparts, pwgts,1)], imax(nparts, pwgts,1), + ComputeLoadImbalance(graph, nparts, ctrl->pijbm), + graph->nbnd, nmoved, graph->mincut, ComputeVolume(graph, where)); + if (ctrl->minconn) + printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads,1), isum(nparts, nads,1)); + printf("\n"); + } - if (graph->mincut == oldcut) + if (nmoved == 0 || (omode == OMODE_REFINE && graph->mincut == oldcut)) break; } - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nvtxs); -} - + rpqDestroy(queue); + WCOREPOP; +} +/*************************************************************************/ +/*! K-way refinement that minimizes the communication volume. This is a + greedy routine and the vertices are visited in decreasing gv order. + \param graph is the graph that is being refined. + \param niter is the number of refinement iterations. + \param ffactor is the \em fudge-factor for allowing positive gain moves + to violate the max-pwgt constraint. -/************************************************************************* -* This function performs k-way refinement -**************************************************************************/ -void Greedy_KWayEdgeRefine(CtrlType *ctrl, GraphType *graph, int nparts, float *tpwgts, float ubfactor, int npasses) +*/ +/**************************************************************************/ +void Greedy_KWayVolOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, + real_t ffactor, idx_t omode) { - int i, ii, iii, j, jj, k, l, pass, nvtxs, nbnd, tvwgt, myndegrees, oldgain, gain; - int from, me, to, oldcut, vwgt; - idxtype *xadj, *adjncy, *adjwgt; - idxtype *where, *pwgts, *perm, *bndptr, *bndind, *minwgt, *maxwgt, *moved, *itpwgts; - EDegreeType *myedegrees; - RInfoType *myrinfo; - PQueueType queue; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; + /* Common variables to all types of kway-refinement/balancing routines */ + idx_t i, ii, iii, j, k, l, pass, nvtxs, nparts, gain; + idx_t from, me, to, oldcut, vwgt; + idx_t *xadj, *adjncy; + idx_t *where, *pwgts, *perm, *bndptr, *bndind, *minpwgts, *maxpwgts; + idx_t nmoved, nupd, *vstatus, *updptr, *updind; + idx_t maxndoms, *safetos=NULL, *nads=NULL, *doms=NULL, **adids=NULL, **adwgts=NULL; + idx_t *bfslvl=NULL, *bfsind=NULL, *bfsmrk=NULL; + idx_t bndtype = (omode == OMODE_REFINE ? BNDTYPE_REFINE : BNDTYPE_BALANCE); + real_t *tpwgts; + + /* Volume-specific/different variables */ + ipq_t *queue; + idx_t oldvol, xgain; + idx_t *vmarker, *pmarker, *modind; + vkrinfo_t *myrinfo; + vnbr_t *mynbrs; + + WCOREPUSH; + + /* Link the graph fields */ + nvtxs = graph->nvtxs; + xadj = graph->xadj; adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - bndind = graph->bndind; bndptr = graph->bndptr; - - where = graph->where; - pwgts = graph->pwgts; + bndind = graph->bndind; + where = graph->where; + pwgts = graph->pwgts; + nparts = ctrl->nparts; + tpwgts = ctrl->tpwgts; + /* Setup the weight intervals of the various subdomains */ - minwgt = idxwspacemalloc(ctrl, nparts); - maxwgt = idxwspacemalloc(ctrl, nparts); - itpwgts = idxwspacemalloc(ctrl, nparts); - tvwgt = idxsum(nparts, pwgts); - ASSERT(tvwgt == idxsum(nvtxs, graph->vwgt)); + minpwgts = iwspacemalloc(ctrl, nparts); + maxpwgts = iwspacemalloc(ctrl, nparts); for (i=0; itpwgts[i]*graph->tvwgt[0]*ctrl->ubfactors[0]; + minpwgts[i] = ctrl->tpwgts[i]*graph->tvwgt[0]*(1.0/ctrl->ubfactors[0]); } - perm = idxwspacemalloc(ctrl, nvtxs); - moved = idxwspacemalloc(ctrl, nvtxs); + perm = iwspacemalloc(ctrl, nvtxs); - PQueueInit(ctrl, &queue, nvtxs, graph->adjwgtsum[idxamax(nvtxs, graph->adjwgtsum)]); - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("Partitions: [%6d %6d]-[%6d %6d], Balance: %5.3f, Nv-Nb[%6d %6d]. Cut: %6d\n", - pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], minwgt[0], maxwgt[0], - 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nvtxs, graph->nbnd, - graph->mincut)); + /* This stores the valid target subdomains. It is used when ctrl->minconn to + control the subdomains to which moves are allowed to be made. + When ctrl->minconn is false, the default values of 2 allow all moves to + go through and it does not interfere with the zero-gain move selection. */ + safetos = iset(nparts, 2, iwspacemalloc(ctrl, nparts)); - for (pass=0; passmincut); + if (ctrl->minconn) { + ComputeSubDomainGraph(ctrl, graph); - PQueueReset(&queue); - idxset(nvtxs, -1, moved); + nads = ctrl->nads; + adids = ctrl->adids; + adwgts = ctrl->adwgts; + doms = iset(nparts, 0, ctrl->pvec1); + } - oldcut = graph->mincut; - nbnd = graph->nbnd; - RandomPermute(nbnd, perm, 1); - for (ii=0; iirinfo[i].ed - graph->rinfo[i].id); - moved[i] = 2; - } + /* Setup updptr, updind like boundary info to keep track of the vertices whose + vstatus's need to be reset at the end of the inner iteration */ + vstatus = iset(nvtxs, VPQSTATUS_NOTPRESENT, iwspacemalloc(ctrl, nvtxs)); + updptr = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs)); + updind = iwspacemalloc(ctrl, nvtxs); - for (iii=0;;iii++) { - if ((i = PQueueGetMax(&queue)) == -1) - break; - moved[i] = 1; + if (ctrl->contig) { + /* The arrays that will be used for limited check of articulation points */ + bfslvl = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + bfsind = iwspacemalloc(ctrl, nvtxs); + bfsmrk = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + } - myrinfo = graph->rinfo+i; - from = where[i]; - vwgt = graph->vwgt[i]; + /* Vol-refinement specific working arrays */ + modind = iwspacemalloc(ctrl, nvtxs); + vmarker = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + pmarker = iset(nparts, -1, iwspacemalloc(ctrl, nparts)); + + if (ctrl->dbglvl&METIS_DBG_REFINE) { + printf("%s: [%6"PRIDX" %6"PRIDX"]-[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL + ", Nv-Nb[%6"PRIDX" %6"PRIDX"], Cut: %5"PRIDX", Vol: %5"PRIDX, + (omode == OMODE_REFINE ? "GRV" : "GBV"), + pwgts[iargmin(nparts, pwgts,1)], imax(nparts, pwgts,1), minpwgts[0], maxpwgts[0], + ComputeLoadImbalance(graph, nparts, ctrl->pijbm), + graph->nvtxs, graph->nbnd, graph->mincut, graph->minvol); + if (ctrl->minconn) + printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads,1), isum(nparts, nads,1)); + printf("\n"); + } - if (pwgts[from]-vwgt < minwgt[from]) - continue; /* This cannot be moved! */ + queue = ipqCreate(nvtxs); - myedegrees = myrinfo->edegrees; - myndegrees = myrinfo->ndegrees; - j = myrinfo->id; - for (k=0; kid. Allow good nodes to move */ - if (pwgts[to]+vwgt <= maxwgt[to]+gain && gain >= 0) - break; - } - if (k == myndegrees) - continue; /* break out if you did not find a candidate */ + /*===================================================================== + * The top-level refinement loop + *======================================================================*/ + for (pass=0; passminvol); - for (j=k+1; j myedegrees[k].ed && pwgts[to]+vwgt <= maxwgt[to]) || - (myedegrees[j].ed == myedegrees[k].ed && - itpwgts[myedegrees[k].pid]*pwgts[to] < itpwgts[to]*pwgts[myedegrees[k].pid])) - k = j; + if (omode == OMODE_BALANCE) { + /* Check to see if things are out of balance, given the tolerance */ + for (i=0; i maxpwgts[i]) + break; } + if (i == nparts) /* Things are balanced. Return right away */ + break; + } - to = myedegrees[k].pid; + oldcut = graph->mincut; + oldvol = graph->minvol; + nupd = 0; - j = 0; - if (myedegrees[k].ed-myrinfo->id > 0) - j = 1; - else if (myedegrees[k].ed-myrinfo->id == 0) { - if ((iii&7) == 0 || pwgts[from] >= maxwgt[from] || itpwgts[from]*(pwgts[to]+vwgt) < itpwgts[to]*pwgts[from]) - j = 1; - } - if (j == 0) - continue; - - /*===================================================================== - * If we got here, we can now move the vertex from 'from' to 'to' - *======================================================================*/ - graph->mincut -= myedegrees[k].ed-myrinfo->id; + if (ctrl->minconn) + maxndoms = imax(nparts, nads,1); - IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("\t\tMoving %6d to %3d. Gain: %4d. Cut: %6d\n", i, to, myedegrees[k].ed-myrinfo->id, graph->mincut)); + /* Insert the boundary vertices in the priority queue */ + irandArrayPermute(graph->nbnd, perm, graph->nbnd/4, 1); + for (ii=0; iinbnd; ii++) { + i = bndind[perm[ii]]; + ipqInsert(queue, i, graph->vkrinfo[i].gv); + vstatus[i] = VPQSTATUS_PRESENT; + ListInsert(nupd, updind, updptr, i); + } - /* Update where, weight, and ID/ED information of the vertex you moved */ - where[i] = to; - INC_DEC(pwgts[to], pwgts[from], vwgt); - myrinfo->ed += myrinfo->id-myedegrees[k].ed; - SWAP(myrinfo->id, myedegrees[k].ed, j); - if (myedegrees[k].ed == 0) - myedegrees[k] = myedegrees[--myrinfo->ndegrees]; - else - myedegrees[k].pid = from; + /* Start extracting vertices from the queue and try to move them */ + for (nmoved=0, iii=0;;iii++) { + if ((i = ipqGetTop(queue)) == -1) + break; + vstatus[i] = VPQSTATUS_EXTRACTED; - if (myrinfo->ed < myrinfo->id) - BNDDelete(nbnd, bndind, bndptr, i); + myrinfo = graph->vkrinfo+i; + mynbrs = ctrl->vnbrpool + myrinfo->inbr; - /* Update the degrees of adjacent vertices */ - for (j=xadj[i]; jvwgt[i]; - myrinfo = graph->rinfo+ii; - if (myrinfo->edegrees == NULL) { - myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree; - ctrl->wspace.cdegree += xadj[ii+1]-xadj[ii]; - } - myedegrees = myrinfo->edegrees; + /* Prevent moves that make 'from' domain underbalanced */ + if (omode == OMODE_REFINE) { + if (myrinfo->nid > 0 && pwgts[from]-vwgt < minpwgts[from]) + continue; + } + else { /* OMODE_BALANCE */ + if (pwgts[from]-vwgt < minpwgts[from]) + continue; + } - ASSERT(CheckRInfo(myrinfo)); + if (ctrl->contig && IsArticulationNode(i, xadj, adjncy, where, bfslvl, bfsind, bfsmrk)) + continue; - oldgain = (myrinfo->ed-myrinfo->id); + if (ctrl->minconn) + SelectSafeTargetSubdomains(myrinfo, mynbrs, nads, adids, maxndoms, safetos, doms); - if (me == from) { - INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); + xgain = (myrinfo->nid == 0 && myrinfo->ned > 0 ? graph->vsize[i] : 0); - if (myrinfo->ed-myrinfo->id >= 0 && bndptr[ii] == -1) - BNDInsert(nbnd, bndind, bndptr, ii); + /* Find the most promising subdomain to move to */ + if (omode == OMODE_REFINE) { + for (k=myrinfo->nnbrs-1; k>=0; k--) { + if (!safetos[to=mynbrs[k].pid]) + continue; + gain = mynbrs[k].gv + xgain; + if (gain >= 0 && pwgts[to]+vwgt <= maxpwgts[to]+ffactor*gain) + break; } - else if (me == to) { - INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); + if (k < 0) + continue; /* break out if you did not find a candidate */ - if (myrinfo->ed-myrinfo->id < 0 && bndptr[ii] != -1) - BNDDelete(nbnd, bndind, bndptr, ii); + for (j=k-1; j>=0; j--) { + if (!safetos[to=mynbrs[j].pid]) + continue; + gain = mynbrs[j].gv + xgain; + if ((mynbrs[j].gv > mynbrs[k].gv && + pwgts[to]+vwgt <= maxpwgts[to]+ffactor*gain) + || + (mynbrs[j].gv == mynbrs[k].gv && + mynbrs[j].ned > mynbrs[k].ned && + pwgts[to]+vwgt <= maxpwgts[to]) + || + (mynbrs[j].gv == mynbrs[k].gv && + mynbrs[j].ned == mynbrs[k].ned && + tpwgts[mynbrs[k].pid]*pwgts[to] < tpwgts[to]*pwgts[mynbrs[k].pid]) + ) + k = j; } + to = mynbrs[k].pid; - /* Remove contribution from the .ed of 'from' */ - if (me != from) { - for (k=0; kndegrees; k++) { - if (myedegrees[k].pid == from) { - if (myedegrees[k].ed == adjwgt[j]) - myedegrees[k] = myedegrees[--myrinfo->ndegrees]; - else - myedegrees[k].ed -= adjwgt[j]; - break; - } - } + ASSERT(xgain+mynbrs[k].gv >= 0); + + j = 0; + if (xgain+mynbrs[k].gv > 0 || mynbrs[k].ned-myrinfo->nid > 0) + j = 1; + else if (mynbrs[k].ned-myrinfo->nid == 0) { + if ((iii%2 == 0 && safetos[to] == 2) || + pwgts[from] >= maxpwgts[from] || + tpwgts[from]*(pwgts[to]+vwgt) < tpwgts[to]*pwgts[from]) + j = 1; + } + if (j == 0) + continue; + } + else { /* OMODE_BALANCE */ + for (k=myrinfo->nnbrs-1; k>=0; k--) { + if (!safetos[to=mynbrs[k].pid]) + continue; + if (pwgts[to]+vwgt <= maxpwgts[to] || + tpwgts[from]*(pwgts[to]+vwgt) <= tpwgts[to]*pwgts[from]) + break; } + if (k < 0) + continue; /* break out if you did not find a candidate */ - /* Add contribution to the .ed of 'to' */ - if (me != to) { - for (k=0; kndegrees; k++) { - if (myedegrees[k].pid == to) { - myedegrees[k].ed += adjwgt[j]; - break; - } - } - if (k == myrinfo->ndegrees) { - myedegrees[myrinfo->ndegrees].pid = to; - myedegrees[myrinfo->ndegrees++].ed = adjwgt[j]; - } + for (j=k-1; j>=0; j--) { + if (!safetos[to=mynbrs[j].pid]) + continue; + if (tpwgts[mynbrs[k].pid]*pwgts[to] < tpwgts[to]*pwgts[mynbrs[k].pid]) + k = j; } + to = mynbrs[k].pid; - /* Update the queue */ - if (me == to || me == from) { - gain = myrinfo->ed-myrinfo->id; - if (moved[ii] == 2) { - if (gain >= 0) - PQueueUpdate(&queue, ii, oldgain, gain); - else { - PQueueDelete(&queue, ii, oldgain); - moved[ii] = -1; - } - } - else if (moved[ii] == -1 && gain >= 0) { - PQueueInsert(&queue, ii, gain); - moved[ii] = 2; - } - } + if (pwgts[from] < maxpwgts[from] && pwgts[to] > minpwgts[to] && + (xgain+mynbrs[k].gv < 0 || + (xgain+mynbrs[k].gv == 0 && mynbrs[k].ned-myrinfo->nid < 0)) + ) + continue; + } + + + /*===================================================================== + * If we got here, we can now move the vertex from 'from' to 'to' + *======================================================================*/ + INC_DEC(pwgts[to], pwgts[from], vwgt); + graph->mincut -= mynbrs[k].ned-myrinfo->nid; + graph->minvol -= (xgain+mynbrs[k].gv); + where[i] = to; + nmoved++; - ASSERT(myrinfo->ndegrees <= xadj[ii+1]-xadj[ii]); - ASSERT(CheckRInfo(myrinfo)); + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("\t\tMoving %6"PRIDX" from %3"PRIDX" to %3"PRIDX". " + "Gain: [%4"PRIDX" %4"PRIDX"]. Cut: %6"PRIDX", Vol: %6"PRIDX"\n", + i, from, to, xgain+mynbrs[k].gv, mynbrs[k].ned-myrinfo->nid, + graph->mincut, graph->minvol)); + /* Update the subdomain connectivity information */ + if (ctrl->minconn) { + /* take care of i's move itself */ + UpdateEdgeSubDomainGraph(ctrl, from, to, myrinfo->nid-mynbrs[k].ned, &maxndoms); + + /* take care of the adjacent vertices */ + for (j=xadj[i]; jnbnd = nbnd; - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("\t[%6d %6d], Balance: %5.3f, Nb: %6d. Cut: %6d\n", - pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], - 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nbnd, graph->mincut)); + /* Reset the vstatus and associated data structures */ + for (i=0; idbglvl&METIS_DBG_REFINE) { + printf("\t[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL", Nb: %6"PRIDX"." + " Nmoves: %5"PRIDX", Cut: %6"PRIDX", Vol: %6"PRIDX, + pwgts[iargmin(nparts, pwgts,1)], imax(nparts, pwgts,1), + ComputeLoadImbalance(graph, nparts, ctrl->pijbm), + graph->nbnd, nmoved, graph->mincut, graph->minvol); + if (ctrl->minconn) + printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads,1), isum(nparts, nads,1)); + printf("\n"); + } - if (graph->mincut == oldcut) + if (nmoved == 0 || + (omode == OMODE_REFINE && graph->minvol == oldvol && graph->mincut == oldcut)) break; } - PQueueFree(ctrl, &queue); - - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); + ipqDestroy(queue); + WCOREPOP; } -/************************************************************************* -* This function performs k-way refinement -**************************************************************************/ -void Greedy_KWayEdgeBalance(CtrlType *ctrl, GraphType *graph, int nparts, float *tpwgts, float ubfactor, int npasses) +/*************************************************************************/ +/*! K-way partitioning optimization in which the vertices are visited in + decreasing ed/sqrt(nnbrs)-id order. Note this is just an + approximation, as the ed is often split across different subdomains + and the sqrt(nnbrs) is just a crude approximation. + + \param graph is the graph that is being refined. + \param niter is the number of refinement iterations. + \param ffactor is the \em fudge-factor for allowing positive gain moves + to violate the max-pwgt constraint. + \param omode is the type of optimization that will performed among + OMODE_REFINE and OMODE_BALANCE + + +*/ +/**************************************************************************/ +void Greedy_McKWayCutOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, + real_t ffactor, idx_t omode) { - int i, ii, iii, j, jj, k, l, pass, nvtxs, nbnd, tvwgt, myndegrees, oldgain, gain, nmoves; - int from, me, to, oldcut, vwgt; - idxtype *xadj, *adjncy, *adjwgt; - idxtype *where, *pwgts, *perm, *bndptr, *bndind, *minwgt, *maxwgt, *moved, *itpwgts; - EDegreeType *myedegrees; - RInfoType *myrinfo; - PQueueType queue; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; + /* Common variables to all types of kway-refinement/balancing routines */ + idx_t i, ii, iii, j, k, l, pass, nvtxs, ncon, nparts, gain; + idx_t from, me, to, cto, oldcut; + idx_t *xadj, *vwgt, *adjncy, *adjwgt; + idx_t *where, *pwgts, *perm, *bndptr, *bndind, *minpwgts, *maxpwgts; + idx_t nmoved, nupd, *vstatus, *updptr, *updind; + idx_t maxndoms, *safetos=NULL, *nads=NULL, *doms=NULL, **adids=NULL, **adwgts=NULL; + idx_t *bfslvl=NULL, *bfsind=NULL, *bfsmrk=NULL; + idx_t bndtype = (omode == OMODE_REFINE ? BNDTYPE_REFINE : BNDTYPE_BALANCE); + real_t *ubfactors, *pijbm; + real_t origbal; + + /* Edgecut-specific/different variables */ + idx_t nbnd, oldnnbrs; + rpq_t *queue; + real_t rgain; + ckrinfo_t *myrinfo; + cnbr_t *mynbrs; + + WCOREPUSH; + + /* Link the graph fields */ + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; @@ -470,203 +742,1479 @@ void Greedy_KWayEdgeBalance(CtrlType *ctrl, GraphType *graph, int nparts, float where = graph->where; pwgts = graph->pwgts; + nparts = ctrl->nparts; + pijbm = ctrl->pijbm; + + + /* Determine the ubfactors. The method used is different based on omode. + When OMODE_BALANCE, the ubfactors are those supplied by the user. + When OMODE_REFINE, the ubfactors are the max of the current partition + and the user-specified ones. */ + ubfactors = rwspacemalloc(ctrl, ncon); + ComputeLoadImbalanceVec(graph, nparts, pijbm, ubfactors); + origbal = rvecmaxdiff(ncon, ubfactors, ctrl->ubfactors); + if (omode == OMODE_BALANCE) { + rcopy(ncon, ctrl->ubfactors, ubfactors); + } + else { + for (i=0; i ctrl->ubfactors[i] ? ubfactors[i] : ctrl->ubfactors[i]); + } + + /* Setup the weight intervals of the various subdomains */ - minwgt = idxwspacemalloc(ctrl, nparts); - maxwgt = idxwspacemalloc(ctrl, nparts); - itpwgts = idxwspacemalloc(ctrl, nparts); - tvwgt = idxsum(nparts, pwgts); - ASSERT(tvwgt == idxsum(nvtxs, graph->vwgt)); + minpwgts = iwspacemalloc(ctrl, nparts*ncon); + maxpwgts = iwspacemalloc(ctrl, nparts*ncon); for (i=0; itpwgts[i*ncon+j]*graph->tvwgt[j]*ubfactors[j]; + /*minpwgts[i*ncon+j] = ctrl->tpwgts[i*ncon+j]*graph->tvwgt[j]*(.9/ubfactors[j]);*/ + minpwgts[i*ncon+j] = ctrl->tpwgts[i*ncon+j]*graph->tvwgt[j]*.2; + } } - perm = idxwspacemalloc(ctrl, nvtxs); - moved = idxwspacemalloc(ctrl, nvtxs); + perm = iwspacemalloc(ctrl, nvtxs); - PQueueInit(ctrl, &queue, nvtxs, graph->adjwgtsum[idxamax(nvtxs, graph->adjwgtsum)]); - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("Partitions: [%6d %6d]-[%6d %6d], Balance: %5.3f, Nv-Nb[%6d %6d]. Cut: %6d [B]\n", - pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], minwgt[0], maxwgt[0], - 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nvtxs, graph->nbnd, - graph->mincut)); + /* This stores the valid target subdomains. It is used when ctrl->minconn to + control the subdomains to which moves are allowed to be made. + When ctrl->minconn is false, the default values of 2 allow all moves to + go through and it does not interfere with the zero-gain move selection. */ + safetos = iset(nparts, 2, iwspacemalloc(ctrl, nparts)); - for (pass=0; passmincut); + if (ctrl->minconn) { + ComputeSubDomainGraph(ctrl, graph); - /* Check to see if things are out of balance, given the tolerance */ - for (i=0; i maxwgt[i]) - break; - } - if (i == nparts) /* Things are balanced. Return right away */ - break; + nads = ctrl->nads; + adids = ctrl->adids; + adwgts = ctrl->adwgts; + doms = iset(nparts, 0, ctrl->pvec1); + } + + + /* Setup updptr, updind like boundary info to keep track of the vertices whose + vstatus's need to be reset at the end of the inner iteration */ + vstatus = iset(nvtxs, VPQSTATUS_NOTPRESENT, iwspacemalloc(ctrl, nvtxs)); + updptr = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs)); + updind = iwspacemalloc(ctrl, nvtxs); + + if (ctrl->contig) { + /* The arrays that will be used for limited check of articulation points */ + bfslvl = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + bfsind = iwspacemalloc(ctrl, nvtxs); + bfsmrk = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + } + + if (ctrl->dbglvl&METIS_DBG_REFINE) { + printf("%s: [%6"PRIDX" %6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL"(%.3"PRREAL")," + " Nv-Nb[%6"PRIDX" %6"PRIDX"], Cut: %6"PRIDX", (%"PRIDX")", + (omode == OMODE_REFINE ? "GRC" : "GBC"), + imin(nparts*ncon, pwgts,1), imax(nparts*ncon, pwgts,1), imax(nparts*ncon, maxpwgts,1), + ComputeLoadImbalance(graph, nparts, pijbm), origbal, + graph->nvtxs, graph->nbnd, graph->mincut, niter); + if (ctrl->minconn) + printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads,1), isum(nparts, nads,1)); + printf("\n"); + } + + queue = rpqCreate(nvtxs); - PQueueReset(&queue); - idxset(nvtxs, -1, moved); + /*===================================================================== + * The top-level refinement loop + *======================================================================*/ + for (pass=0; passmincut); + if (omode == OMODE_REFINE) + ASSERT(CheckBnd2(graph)); + + /* In balancing mode, exit as soon as balance is reached */ + if (omode == OMODE_BALANCE && IsBalanced(ctrl, graph, 0)) + break; + oldcut = graph->mincut; - nbnd = graph->nbnd; + nbnd = graph->nbnd; + nupd = 0; + + if (ctrl->minconn) + maxndoms = imax(nparts, nads,1); - RandomPermute(nbnd, perm, 1); + /* Insert the boundary vertices in the priority queue */ + irandArrayPermute(nbnd, perm, nbnd/4, 1); for (ii=0; iirinfo[i].ed - graph->rinfo[i].id); - moved[i] = 2; + rgain = (graph->ckrinfo[i].nnbrs > 0 ? + 1.0*graph->ckrinfo[i].ed/sqrt(graph->ckrinfo[i].nnbrs) : 0.0) + - graph->ckrinfo[i].id; + rpqInsert(queue, i, rgain); + vstatus[i] = VPQSTATUS_PRESENT; + ListInsert(nupd, updind, updptr, i); } - nmoves = 0; - for (;;) { - if ((i = PQueueGetMax(&queue)) == -1) + /* Start extracting vertices from the queue and try to move them */ + for (nmoved=0, iii=0;;iii++) { + if ((i = rpqGetTop(queue)) == -1) break; - moved[i] = 1; + vstatus[i] = VPQSTATUS_EXTRACTED; + + myrinfo = graph->ckrinfo+i; + mynbrs = ctrl->cnbrpool + myrinfo->inbr; - myrinfo = graph->rinfo+i; from = where[i]; - vwgt = graph->vwgt[i]; - if (pwgts[from]-vwgt < minwgt[from]) - continue; /* This cannot be moved! */ + /* Prevent moves that make 'from' domain underbalanced */ + if (omode == OMODE_REFINE) { + if (myrinfo->id > 0 && + !ivecaxpygez(ncon, -1, vwgt+i*ncon, pwgts+from*ncon, minpwgts+from*ncon)) + continue; + } + else { /* OMODE_BALANCE */ + if (!ivecaxpygez(ncon, -1, vwgt+i*ncon, pwgts+from*ncon, minpwgts+from*ncon)) + continue; + } + + if (ctrl->contig && IsArticulationNode(i, xadj, adjncy, where, bfslvl, bfsind, bfsmrk)) + continue; - myedegrees = myrinfo->edegrees; - myndegrees = myrinfo->ndegrees; + if (ctrl->minconn) + SelectSafeTargetSubdomains(myrinfo, mynbrs, nads, adids, maxndoms, safetos, doms); - for (k=0; knnbrs-1; k>=0; k--) { + if (!safetos[to=mynbrs[k].pid]) + continue; + gain = mynbrs[k].ed-myrinfo->id; + if (gain >= 0 && ivecaxpylez(ncon, 1, vwgt+i*ncon, pwgts+to*ncon, maxpwgts+to*ncon)) + break; + } + if (k < 0) + continue; /* break out if you did not find a candidate */ + + cto = to; + for (j=k-1; j>=0; j--) { + if (!safetos[to=mynbrs[j].pid]) + continue; + if ((mynbrs[j].ed > mynbrs[k].ed && + ivecaxpylez(ncon, 1, vwgt+i*ncon, pwgts+to*ncon, maxpwgts+to*ncon)) + || + (mynbrs[j].ed == mynbrs[k].ed && + BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors, + 1, pwgts+cto*ncon, pijbm+cto*ncon, + 1, pwgts+to*ncon, pijbm+to*ncon))) { + k = j; + cto = to; + } + } + to = cto; + + gain = mynbrs[k].ed-myrinfo->id; + if (!(gain > 0 + || (gain == 0 + && (BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors, + -1, pwgts+from*ncon, pijbm+from*ncon, + +1, pwgts+to*ncon, pijbm+to*ncon) + || (iii%2 == 0 && safetos[to] == 2) + ) + ) + ) + ) + continue; } - if (k == myndegrees) - continue; /* break out if you did not find a candidate */ + else { /* OMODE_BALANCE */ + for (k=myrinfo->nnbrs-1; k>=0; k--) { + if (!safetos[to=mynbrs[k].pid]) + continue; + if (ivecaxpylez(ncon, 1, vwgt+i*ncon, pwgts+to*ncon, maxpwgts+to*ncon) || + BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors, + -1, pwgts+from*ncon, pijbm+from*ncon, + +1, pwgts+to*ncon, pijbm+to*ncon)) + break; + } + if (k < 0) + continue; /* break out if you did not find a candidate */ + + cto = to; + for (j=k-1; j>=0; j--) { + if (!safetos[to=mynbrs[j].pid]) + continue; + if (BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors, + 1, pwgts+cto*ncon, pijbm+cto*ncon, + 1, pwgts+to*ncon, pijbm+to*ncon)) { + k = j; + cto = to; + } + } + to = cto; - for (j=k+1; jid < 0 && + !BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors, + -1, pwgts+from*ncon, pijbm+from*ncon, + +1, pwgts+to*ncon, pijbm+to*ncon)) + continue; } - to = myedegrees[k].pid; - if (pwgts[from] < maxwgt[from] && pwgts[to] > minwgt[to] && myedegrees[k].ed-myrinfo->id < 0) - continue; /*===================================================================== * If we got here, we can now move the vertex from 'from' to 'to' *======================================================================*/ - graph->mincut -= myedegrees[k].ed-myrinfo->id; + graph->mincut -= mynbrs[k].ed-myrinfo->id; + nmoved++; - IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("\t\tMoving %6d to %3d. Gain: %4d. Cut: %6d\n", i, to, myedegrees[k].ed-myrinfo->id, graph->mincut)); + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("\t\tMoving %6"PRIDX" to %3"PRIDX". Gain: %4"PRIDX". Cut: %6"PRIDX"\n", + i, to, mynbrs[k].ed-myrinfo->id, graph->mincut)); - /* Update where, weight, and ID/ED information of the vertex you moved */ - where[i] = to; - INC_DEC(pwgts[to], pwgts[from], vwgt); - myrinfo->ed += myrinfo->id-myedegrees[k].ed; - SWAP(myrinfo->id, myedegrees[k].ed, j); - if (myedegrees[k].ed == 0) - myedegrees[k] = myedegrees[--myrinfo->ndegrees]; - else - myedegrees[k].pid = from; + /* Update the subdomain connectivity information */ + if (ctrl->minconn) { + /* take care of i's move itself */ + UpdateEdgeSubDomainGraph(ctrl, from, to, myrinfo->id-mynbrs[k].ed, &maxndoms); - if (myrinfo->ed == 0) - BNDDelete(nbnd, bndind, bndptr, i); + /* take care of the adjacent vertices */ + for (j=xadj[i]; jckrinfo+ii; - myrinfo = graph->rinfo+ii; - if (myrinfo->edegrees == NULL) { - myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree; - ctrl->wspace.cdegree += xadj[ii+1]-xadj[ii]; - } - myedegrees = myrinfo->edegrees; + oldnnbrs = myrinfo->nnbrs; - ASSERT(CheckRInfo(myrinfo)); + UpdateAdjacentVertexInfoAndBND(ctrl, ii, xadj[ii+1]-xadj[ii], me, + from, to, myrinfo, adjwgt[j], nbnd, bndptr, bndind, bndtype); - oldgain = (myrinfo->ed-myrinfo->id); + UpdateQueueInfo(queue, vstatus, ii, me, from, to, myrinfo, oldnnbrs, + nupd, updptr, updind, bndtype); - if (me == from) { - INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); + ASSERT(myrinfo->nnbrs <= xadj[ii+1]-xadj[ii]); + } + } - if (myrinfo->ed > 0 && bndptr[ii] == -1) - BNDInsert(nbnd, bndind, bndptr, ii); - } - else if (me == to) { - INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); + graph->nbnd = nbnd; - if (myrinfo->ed == 0 && bndptr[ii] != -1) - BNDDelete(nbnd, bndind, bndptr, ii); - } + /* Reset the vstatus and associated data structures */ + for (i=0; indegrees; k++) { - if (myedegrees[k].pid == from) { - if (myedegrees[k].ed == adjwgt[j]) - myedegrees[k] = myedegrees[--myrinfo->ndegrees]; - else - myedegrees[k].ed -= adjwgt[j]; - break; - } - } - } + if (ctrl->dbglvl&METIS_DBG_REFINE) { + printf("\t[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL", Nb: %6"PRIDX"." + " Nmoves: %5"PRIDX", Cut: %6"PRIDX", Vol: %6"PRIDX, + imin(nparts*ncon, pwgts,1), imax(nparts*ncon, pwgts,1), + ComputeLoadImbalance(graph, nparts, pijbm), + graph->nbnd, nmoved, graph->mincut, ComputeVolume(graph, where)); + if (ctrl->minconn) + printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads,1), isum(nparts, nads,1)); + printf("\n"); + } - /* Add contribution to the .ed of 'to' */ - if (me != to) { - for (k=0; kndegrees; k++) { - if (myedegrees[k].pid == to) { - myedegrees[k].ed += adjwgt[j]; - break; - } - } - if (k == myrinfo->ndegrees) { - myedegrees[myrinfo->ndegrees].pid = to; - myedegrees[myrinfo->ndegrees++].ed = adjwgt[j]; - } - } + if (nmoved == 0 || (omode == OMODE_REFINE && graph->mincut == oldcut)) + break; + } - /* Update the queue */ - if (me == to || me == from) { - gain = myrinfo->ed-myrinfo->id; - if (moved[ii] == 2) { - if (myrinfo->ed > 0) - PQueueUpdate(&queue, ii, oldgain, gain); - else { - PQueueDelete(&queue, ii, oldgain); - moved[ii] = -1; + rpqDestroy(queue); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! K-way refinement that minimizes the communication volume. This is a + greedy routine and the vertices are visited in decreasing gv order. + + \param graph is the graph that is being refined. + \param niter is the number of refinement iterations. + \param ffactor is the \em fudge-factor for allowing positive gain moves + to violate the max-pwgt constraint. + +*/ +/**************************************************************************/ +void Greedy_McKWayVolOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, + real_t ffactor, idx_t omode) +{ + /* Common variables to all types of kway-refinement/balancing routines */ + idx_t i, ii, iii, j, k, l, pass, nvtxs, ncon, nparts, gain; + idx_t from, me, to, cto, oldcut; + idx_t *xadj, *vwgt, *adjncy; + idx_t *where, *pwgts, *perm, *bndptr, *bndind, *minpwgts, *maxpwgts; + idx_t nmoved, nupd, *vstatus, *updptr, *updind; + idx_t maxndoms, *safetos=NULL, *nads=NULL, *doms=NULL, **adids=NULL, **adwgts=NULL; + idx_t *bfslvl=NULL, *bfsind=NULL, *bfsmrk=NULL; + idx_t bndtype = (omode == OMODE_REFINE ? BNDTYPE_REFINE : BNDTYPE_BALANCE); + real_t *ubfactors, *pijbm; + real_t origbal; + + /* Volume-specific/different variables */ + ipq_t *queue; + idx_t oldvol, xgain; + idx_t *vmarker, *pmarker, *modind; + vkrinfo_t *myrinfo; + vnbr_t *mynbrs; + + WCOREPUSH; + + /* Link the graph fields */ + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + bndptr = graph->bndptr; + bndind = graph->bndind; + where = graph->where; + pwgts = graph->pwgts; + + nparts = ctrl->nparts; + pijbm = ctrl->pijbm; + + + /* Determine the ubfactors. The method used is different based on omode. + When OMODE_BALANCE, the ubfactors are those supplied by the user. + When OMODE_REFINE, the ubfactors are the max of the current partition + and the user-specified ones. */ + ubfactors = rwspacemalloc(ctrl, ncon); + ComputeLoadImbalanceVec(graph, nparts, pijbm, ubfactors); + origbal = rvecmaxdiff(ncon, ubfactors, ctrl->ubfactors); + if (omode == OMODE_BALANCE) { + rcopy(ncon, ctrl->ubfactors, ubfactors); + } + else { + for (i=0; i ctrl->ubfactors[i] ? ubfactors[i] : ctrl->ubfactors[i]); + } + + + /* Setup the weight intervals of the various subdomains */ + minpwgts = iwspacemalloc(ctrl, nparts*ncon); + maxpwgts = iwspacemalloc(ctrl, nparts*ncon); + + for (i=0; itpwgts[i*ncon+j]*graph->tvwgt[j]*ubfactors[j]; + /*minpwgts[i*ncon+j] = ctrl->tpwgts[i*ncon+j]*graph->tvwgt[j]*(.9/ubfactors[j]); */ + minpwgts[i*ncon+j] = ctrl->tpwgts[i*ncon+j]*graph->tvwgt[j]*.2; + } + } + + perm = iwspacemalloc(ctrl, nvtxs); + + + /* This stores the valid target subdomains. It is used when ctrl->minconn to + control the subdomains to which moves are allowed to be made. + When ctrl->minconn is false, the default values of 2 allow all moves to + go through and it does not interfere with the zero-gain move selection. */ + safetos = iset(nparts, 2, iwspacemalloc(ctrl, nparts)); + + if (ctrl->minconn) { + ComputeSubDomainGraph(ctrl, graph); + + nads = ctrl->nads; + adids = ctrl->adids; + adwgts = ctrl->adwgts; + doms = iset(nparts, 0, ctrl->pvec1); + } + + + /* Setup updptr, updind like boundary info to keep track of the vertices whose + vstatus's need to be reset at the end of the inner iteration */ + vstatus = iset(nvtxs, VPQSTATUS_NOTPRESENT, iwspacemalloc(ctrl, nvtxs)); + updptr = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs)); + updind = iwspacemalloc(ctrl, nvtxs); + + if (ctrl->contig) { + /* The arrays that will be used for limited check of articulation points */ + bfslvl = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + bfsind = iwspacemalloc(ctrl, nvtxs); + bfsmrk = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + } + + /* Vol-refinement specific working arrays */ + modind = iwspacemalloc(ctrl, nvtxs); + vmarker = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + pmarker = iset(nparts, -1, iwspacemalloc(ctrl, nparts)); + + if (ctrl->dbglvl&METIS_DBG_REFINE) { + printf("%s: [%6"PRIDX" %6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL"(%.3"PRREAL")," + ", Nv-Nb[%6"PRIDX" %6"PRIDX"], Cut: %5"PRIDX", Vol: %5"PRIDX", (%"PRIDX")", + (omode == OMODE_REFINE ? "GRV" : "GBV"), + imin(nparts*ncon, pwgts,1), imax(nparts*ncon, pwgts,1), imax(nparts*ncon, maxpwgts,1), + ComputeLoadImbalance(graph, nparts, pijbm), origbal, + graph->nvtxs, graph->nbnd, graph->mincut, graph->minvol, niter); + if (ctrl->minconn) + printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads,1), isum(nparts, nads,1)); + printf("\n"); + } + + queue = ipqCreate(nvtxs); + + + /*===================================================================== + * The top-level refinement loop + *======================================================================*/ + for (pass=0; passminvol); + + /* In balancing mode, exit as soon as balance is reached */ + if (omode == OMODE_BALANCE && IsBalanced(ctrl, graph, 0)) + break; + + oldcut = graph->mincut; + oldvol = graph->minvol; + nupd = 0; + + if (ctrl->minconn) + maxndoms = imax(nparts, nads,1); + + /* Insert the boundary vertices in the priority queue */ + irandArrayPermute(graph->nbnd, perm, graph->nbnd/4, 1); + for (ii=0; iinbnd; ii++) { + i = bndind[perm[ii]]; + ipqInsert(queue, i, graph->vkrinfo[i].gv); + vstatus[i] = VPQSTATUS_PRESENT; + ListInsert(nupd, updind, updptr, i); + } + + /* Start extracting vertices from the queue and try to move them */ + for (nmoved=0, iii=0;;iii++) { + if ((i = ipqGetTop(queue)) == -1) + break; + vstatus[i] = VPQSTATUS_EXTRACTED; + + myrinfo = graph->vkrinfo+i; + mynbrs = ctrl->vnbrpool + myrinfo->inbr; + + from = where[i]; + + /* Prevent moves that make 'from' domain underbalanced */ + if (omode == OMODE_REFINE) { + if (myrinfo->nid > 0 && + !ivecaxpygez(ncon, -1, vwgt+i*ncon, pwgts+from*ncon, minpwgts+from*ncon)) + continue; + } + else { /* OMODE_BALANCE */ + if (!ivecaxpygez(ncon, -1, vwgt+i*ncon, pwgts+from*ncon, minpwgts+from*ncon)) + continue; + } + + if (ctrl->contig && IsArticulationNode(i, xadj, adjncy, where, bfslvl, bfsind, bfsmrk)) + continue; + + if (ctrl->minconn) + SelectSafeTargetSubdomains(myrinfo, mynbrs, nads, adids, maxndoms, safetos, doms); + + xgain = (myrinfo->nid == 0 && myrinfo->ned > 0 ? graph->vsize[i] : 0); + + /* Find the most promising subdomain to move to */ + if (omode == OMODE_REFINE) { + for (k=myrinfo->nnbrs-1; k>=0; k--) { + if (!safetos[to=mynbrs[k].pid]) + continue; + gain = mynbrs[k].gv + xgain; + if (gain >= 0 && ivecaxpylez(ncon, 1, vwgt+i*ncon, pwgts+to*ncon, maxpwgts+to*ncon)) + break; + } + if (k < 0) + continue; /* break out if you did not find a candidate */ + + cto = to; + for (j=k-1; j>=0; j--) { + if (!safetos[to=mynbrs[j].pid]) + continue; + gain = mynbrs[j].gv + xgain; + if ((mynbrs[j].gv > mynbrs[k].gv && + ivecaxpylez(ncon, 1, vwgt+i*ncon, pwgts+to*ncon, maxpwgts+to*ncon)) + || + (mynbrs[j].gv == mynbrs[k].gv && + mynbrs[j].ned > mynbrs[k].ned && + ivecaxpylez(ncon, 1, vwgt+i*ncon, pwgts+to*ncon, maxpwgts+to*ncon)) + || + (mynbrs[j].gv == mynbrs[k].gv && + mynbrs[j].ned == mynbrs[k].ned && + BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors, + 1, pwgts+cto*ncon, pijbm+cto*ncon, + 1, pwgts+to*ncon, pijbm+to*ncon))) { + k = j; + cto = to; + } + } + to = cto; + + j = 0; + if (xgain+mynbrs[k].gv > 0 || mynbrs[k].ned-myrinfo->nid > 0) + j = 1; + else if (mynbrs[k].ned-myrinfo->nid == 0) { + if ((iii%2 == 0 && safetos[to] == 2) || + BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors, + -1, pwgts+from*ncon, pijbm+from*ncon, + +1, pwgts+to*ncon, pijbm+to*ncon)) + j = 1; + } + if (j == 0) + continue; + } + else { /* OMODE_BALANCE */ + for (k=myrinfo->nnbrs-1; k>=0; k--) { + if (!safetos[to=mynbrs[k].pid]) + continue; + if (ivecaxpylez(ncon, 1, vwgt+i*ncon, pwgts+to*ncon, maxpwgts+to*ncon) || + BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors, + -1, pwgts+from*ncon, pijbm+from*ncon, + +1, pwgts+to*ncon, pijbm+to*ncon)) + break; + } + if (k < 0) + continue; /* break out if you did not find a candidate */ + + cto = to; + for (j=k-1; j>=0; j--) { + if (!safetos[to=mynbrs[j].pid]) + continue; + if (BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors, + 1, pwgts+cto*ncon, pijbm+cto*ncon, + 1, pwgts+to*ncon, pijbm+to*ncon)) { + k = j; + cto = to; + } + } + to = cto; + + if ((xgain+mynbrs[k].gv < 0 || + (xgain+mynbrs[k].gv == 0 && mynbrs[k].ned-myrinfo->nid < 0)) + && + !BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors, + -1, pwgts+from*ncon, pijbm+from*ncon, + +1, pwgts+to*ncon, pijbm+to*ncon)) + continue; + } + + + /*===================================================================== + * If we got here, we can now move the vertex from 'from' to 'to' + *======================================================================*/ + graph->mincut -= mynbrs[k].ned-myrinfo->nid; + graph->minvol -= (xgain+mynbrs[k].gv); + where[i] = to; + nmoved++; + + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("\t\tMoving %6"PRIDX" from %3"PRIDX" to %3"PRIDX". " + "Gain: [%4"PRIDX" %4"PRIDX"]. Cut: %6"PRIDX", Vol: %6"PRIDX"\n", + i, from, to, xgain+mynbrs[k].gv, mynbrs[k].ned-myrinfo->nid, + graph->mincut, graph->minvol)); + + /* Update the subdomain connectivity information */ + if (ctrl->minconn) { + /* take care of i's move itself */ + UpdateEdgeSubDomainGraph(ctrl, from, to, myrinfo->nid-mynbrs[k].ned, &maxndoms); + + /* take care of the adjacent vertices */ + for (j=xadj[i]; jdbglvl&METIS_DBG_REFINE) { + printf("\t[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL", Nb: %6"PRIDX"." + " Nmoves: %5"PRIDX", Cut: %6"PRIDX", Vol: %6"PRIDX, + imin(nparts*ncon, pwgts,1), imax(nparts*ncon, pwgts,1), + ComputeLoadImbalance(graph, nparts, pijbm), + graph->nbnd, nmoved, graph->mincut, graph->minvol); + if (ctrl->minconn) + printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads,1), isum(nparts, nads,1)); + printf("\n"); + } + + if (nmoved == 0 || + (omode == OMODE_REFINE && graph->minvol == oldvol && graph->mincut == oldcut)) + break; + } + + ipqDestroy(queue); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function performs an approximate articulation vertex test. + It assumes that the bfslvl, bfsind, and bfsmrk arrays are initialized + appropriately. */ +/*************************************************************************/ +idx_t IsArticulationNode(idx_t i, idx_t *xadj, idx_t *adjncy, idx_t *where, + idx_t *bfslvl, idx_t *bfsind, idx_t *bfsmrk) +{ + idx_t ii, j, k=0, head, tail, nhits, tnhits, from, BFSDEPTH=5; + + from = where[i]; + + /* Determine if the vertex is safe to move from a contiguity standpoint */ + for (tnhits=0, j=xadj[i]; jxadj; + adjncy = graph->adjncy; + vsize = graph->vsize; + where = graph->where; + + myrinfo = graph->vkrinfo+v; + mynbrs = ctrl->vnbrpool + myrinfo->inbr; + + + /*====================================================================== + * Remove the contributions on the gain made by 'v'. + *=====================================================================*/ + for (k=0; knnbrs; k++) + pmarker[mynbrs[k].pid] = k; + pmarker[from] = k; + + myidx = pmarker[to]; /* Keep track of the index in mynbrs of the 'to' domain */ + + for (j=xadj[v]; jvkrinfo+ii; + onbrs = ctrl->vnbrpool + orinfo->inbr; + + if (other == from) { + for (k=0; knnbrs; k++) { + if (pmarker[onbrs[k].pid] == -1) + onbrs[k].gv += vsize[v]; + } + } + else { + ASSERT(pmarker[other] != -1); + + if (mynbrs[pmarker[other]].ned > 1) { + for (k=0; knnbrs; k++) { + if (pmarker[onbrs[k].pid] == -1) + onbrs[k].gv += vsize[v]; + } + } + else { /* There is only one connection */ + for (k=0; knnbrs; k++) { + if (pmarker[onbrs[k].pid] != -1) + onbrs[k].gv -= vsize[v]; + } + } + } + } + + for (k=0; knnbrs; k++) + pmarker[mynbrs[k].pid] = -1; + pmarker[from] = -1; + + + /*====================================================================== + * Update the id/ed of vertex 'v' + *=====================================================================*/ + if (myidx == -1) { + myidx = myrinfo->nnbrs++; + ASSERT(myidx < xadj[v+1]-xadj[v]); + mynbrs[myidx].ned = 0; + } + myrinfo->ned += myrinfo->nid-mynbrs[myidx].ned; + SWAP(myrinfo->nid, mynbrs[myidx].ned, j); + if (mynbrs[myidx].ned == 0) + mynbrs[myidx] = mynbrs[--myrinfo->nnbrs]; + else + mynbrs[myidx].pid = from; + + + /*====================================================================== + * Update the degrees of adjacent vertices and their volume gains + *=====================================================================*/ + vmarker[v] = 1; + modind[0] = v; + nmod = 1; + for (j=xadj[v]; jvkrinfo+ii; + if (myrinfo->inbr == -1) + myrinfo->inbr = vnbrpoolGetNext(ctrl, xadj[ii+1]-xadj[ii]); + mynbrs = ctrl->vnbrpool + myrinfo->inbr; + + if (me == from) { + INC_DEC(myrinfo->ned, myrinfo->nid, 1); + } + else if (me == to) { + INC_DEC(myrinfo->nid, myrinfo->ned, 1); + } + + /* Remove the edgeweight from the 'pid == from' entry of the vertex */ + if (me != from) { + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == from) { + if (mynbrs[k].ned == 1) { + mynbrs[k] = mynbrs[--myrinfo->nnbrs]; + vmarker[ii] = 1; /* You do a complete .gv calculation */ + + /* All vertices adjacent to 'ii' need to be updated */ + for (jj=xadj[ii]; jjvkrinfo+u; + onbrs = ctrl->vnbrpool + orinfo->inbr; + + for (kk=0; kknnbrs; kk++) { + if (onbrs[kk].pid == from) { + onbrs[kk].gv -= vsize[ii]; + if (!vmarker[u]) { /* Need to update boundary etc */ + vmarker[u] = 2; + modind[nmod++] = u; + } + break; + } + } + } + } + else { + mynbrs[k].ned--; + + /* Update the gv due to single 'ii' connection to 'from' */ + if (mynbrs[k].ned == 1) { + /* find the vertex 'u' that 'ii' was connected into 'from' */ + for (jj=xadj[ii]; jjvkrinfo+u; + onbrs = ctrl->vnbrpool + orinfo->inbr; + + /* The following is correct because domains in common + between ii and u will lead to a reduction over the + previous gain, whereas domains only in u but not in + ii, will lead to no change as opposed to the earlier + increase */ + for (kk=0; kknnbrs; kk++) + onbrs[kk].gv += vsize[ii]; + + if (!vmarker[u]) { /* Need to update boundary etc */ + vmarker[u] = 2; + modind[nmod++] = u; + } + break; + } + } + } + } + break; + } + } + } + + + /* Add the edgeweight to the 'pid == to' entry of the vertex */ + if (me != to) { + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == to) { + mynbrs[k].ned++; + + /* Update the gv due to non-single 'ii' connection to 'to' */ + if (mynbrs[k].ned == 2) { + /* find the vertex 'u' that 'ii' was connected into 'to' */ + for (jj=xadj[ii]; jjvkrinfo+u; + onbrs = ctrl->vnbrpool + orinfo->inbr; + for (kk=0; kknnbrs; kk++) + onbrs[kk].gv -= vsize[ii]; + + if (!vmarker[u]) { /* Need to update boundary etc */ + vmarker[u] = 2; + modind[nmod++] = u; + } + break; + } + } + } + break; + } + } + + if (k == myrinfo->nnbrs) { + mynbrs[myrinfo->nnbrs].pid = to; + mynbrs[myrinfo->nnbrs++].ned = 1; + vmarker[ii] = 1; /* You do a complete .gv calculation */ + + /* All vertices adjacent to 'ii' need to be updated */ + for (jj=xadj[ii]; jjvkrinfo+u; + onbrs = ctrl->vnbrpool + orinfo->inbr; + + for (kk=0; kknnbrs; kk++) { + if (onbrs[kk].pid == to) { + onbrs[kk].gv += vsize[ii]; + if (!vmarker[u]) { /* Need to update boundary etc */ + vmarker[u] = 2; + modind[nmod++] = u; + } + break; + } + } + } + } + } + + ASSERT(myrinfo->nnbrs <= xadj[ii+1]-xadj[ii]); + } + + + /*====================================================================== + * Add the contributions on the volume gain due to 'v' + *=====================================================================*/ + myrinfo = graph->vkrinfo+v; + mynbrs = ctrl->vnbrpool + myrinfo->inbr; + for (k=0; knnbrs; k++) + pmarker[mynbrs[k].pid] = k; + pmarker[to] = k; + + for (j=xadj[v]; jvkrinfo+ii; + onbrs = ctrl->vnbrpool + orinfo->inbr; + + if (other == to) { + for (k=0; knnbrs; k++) { + if (pmarker[onbrs[k].pid] == -1) + onbrs[k].gv -= vsize[v]; + } + } + else { + ASSERT(pmarker[other] != -1); + + if (mynbrs[pmarker[other]].ned > 1) { + for (k=0; knnbrs; k++) { + if (pmarker[onbrs[k].pid] == -1) + onbrs[k].gv -= vsize[v]; + } + } + else { /* There is only one connection */ + for (k=0; knnbrs; k++) { + if (pmarker[onbrs[k].pid] != -1) + onbrs[k].gv += vsize[v]; + } + } + } + } + for (k=0; knnbrs; k++) + pmarker[mynbrs[k].pid] = -1; + pmarker[to] = -1; + + + /*====================================================================== + * Recompute the volume information of the 'hard' nodes, and update the + * max volume gain for all the modified vertices and the priority queue + *=====================================================================*/ + for (iii=0; iiivkrinfo+i; + mynbrs = ctrl->vnbrpool + myrinfo->inbr; + + if (vmarker[i] == 1) { /* Only complete gain updates go through */ + for (k=0; knnbrs; k++) + mynbrs[k].gv = 0; + + for (j=xadj[i]; jvkrinfo+ii; + onbrs = ctrl->vnbrpool + orinfo->inbr; + + for (kk=0; kknnbrs; kk++) + pmarker[onbrs[kk].pid] = kk; + pmarker[other] = 1; + + if (me == other) { + /* Find which domains 'i' is connected and 'ii' is not and update their gain */ + for (k=0; knnbrs; k++) { + if (pmarker[mynbrs[k].pid] == -1) + mynbrs[k].gv -= vsize[ii]; + } + } + else { + ASSERT(pmarker[me] != -1); + + /* I'm the only connection of 'ii' in 'me' */ + if (onbrs[pmarker[me]].ned == 1) { + /* Increase the gains for all the common domains between 'i' and 'ii' */ + for (k=0; knnbrs; k++) { + if (pmarker[mynbrs[k].pid] != -1) + mynbrs[k].gv += vsize[ii]; } } - else if (moved[ii] == -1 && myrinfo->ed > 0) { - PQueueInsert(&queue, ii, gain); - moved[ii] = 2; + else { + /* Find which domains 'i' is connected and 'ii' is not and update their gain */ + for (k=0; knnbrs; k++) { + if (pmarker[mynbrs[k].pid] == -1) + mynbrs[k].gv -= vsize[ii]; + } } - } + } - ASSERT(myrinfo->ndegrees <= xadj[ii+1]-xadj[ii]); - ASSERT(CheckRInfo(myrinfo)); + for (kk=0; kknnbrs; kk++) + pmarker[onbrs[kk].pid] = -1; + pmarker[other] = -1; + } - nmoves++; } - graph->nbnd = nbnd; + /* Compute the overall gv for that node */ + myrinfo->gv = IDX_MIN; + for (k=0; knnbrs; k++) { + if (mynbrs[k].gv > myrinfo->gv) + myrinfo->gv = mynbrs[k].gv; + } + + /* Add the xtra gain due to id == 0 */ + if (myrinfo->ned > 0 && myrinfo->nid == 0) + myrinfo->gv += vsize[i]; + + + /*====================================================================== + * Maintain a consistent boundary + *=====================================================================*/ + if (bndtype == BNDTYPE_REFINE) { + if (myrinfo->gv >= 0 && graph->bndptr[i] == -1) + BNDInsert(graph->nbnd, graph->bndind, graph->bndptr, i); + + if (myrinfo->gv < 0 && graph->bndptr[i] != -1) + BNDDelete(graph->nbnd, graph->bndind, graph->bndptr, i); + } + else { + if (myrinfo->ned > 0 && graph->bndptr[i] == -1) + BNDInsert(graph->nbnd, graph->bndind, graph->bndptr, i); + + if (myrinfo->ned == 0 && graph->bndptr[i] != -1) + BNDDelete(graph->nbnd, graph->bndind, graph->bndptr, i); + } + + + /*====================================================================== + * Update the priority queue appropriately (if allowed) + *=====================================================================*/ + if (queue != NULL) { + if (vstatus[i] != VPQSTATUS_EXTRACTED) { + if (graph->bndptr[i] != -1) { /* In-boundary vertex */ + if (vstatus[i] == VPQSTATUS_PRESENT) { + ipqUpdate(queue, i, myrinfo->gv); + } + else { + ipqInsert(queue, i, myrinfo->gv); + vstatus[i] = VPQSTATUS_PRESENT; + ListInsert(*r_nupd, updind, updptr, i); + } + } + else { /* Off-boundary vertex */ + if (vstatus[i] == VPQSTATUS_PRESENT) { + ipqDelete(queue, i); + vstatus[i] = VPQSTATUS_NOTPRESENT; + ListDelete(*r_nupd, updind, updptr, i); + } + } + } + } + + vmarker[i] = 0; + } +} + + +/*************************************************************************/ +/*! K-way partitioning optimization in which the vertices are visited in + decreasing ed/sqrt(nnbrs)-id order. Note this is just an + approximation, as the ed is often split across different subdomains + and the sqrt(nnbrs) is just a crude approximation. + + \param graph is the graph that is being refined. + \param niter is the number of refinement iterations. + \param ffactor is the \em fudge-factor for allowing positive gain moves + to violate the max-pwgt constraint. + \param omode is the type of optimization that will performed among + OMODE_REFINE and OMODE_BALANCE + + +*/ +/**************************************************************************/ +void Greedy_KWayEdgeStats(ctrl_t *ctrl, graph_t *graph) +{ + /* Common variables to all types of kway-refinement/balancing routines */ + idx_t i, ii, iii, j, k, l, nvtxs, nparts, gain, u, v, uw, vw; + idx_t *xadj, *adjncy, *adjwgt, *vwgt; + idx_t *where, *pwgts, *bndptr, *bndind, *minpwgts, *maxpwgts; + idx_t nbnd; + ckrinfo_t *urinfo, *vrinfo; + cnbr_t *unbrs, *vnbrs; + real_t *tpwgts, ubfactor; + + WCOREPUSH; + + /* Link the graph fields */ + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + vwgt = graph->vwgt; + adjwgt = graph->adjwgt; + + bndind = graph->bndind; + bndptr = graph->bndptr; + + where = graph->where; + pwgts = graph->pwgts; + + nparts = ctrl->nparts; + tpwgts = ctrl->tpwgts; + + /* Setup the weight intervals of the various subdomains */ + minpwgts = iwspacemalloc(ctrl, nparts); + maxpwgts = iwspacemalloc(ctrl, nparts); + + ubfactor = ctrl->ubfactors[0]; + for (i=0; itvwgt[0]*ubfactor; + minpwgts[i] = tpwgts[i]*graph->tvwgt[0]*(0.95/ubfactor); + } + + /* go and determine the positive gain valid swaps */ + nbnd = graph->nbnd; + + for (ii=0; iickrinfo+u; + unbrs = ctrl->cnbrpool + urinfo->inbr; + + for (j=xadj[u]; jckrinfo+v; + vnbrs = ctrl->cnbrpool + vrinfo->inbr; + + if (uw == vw) + continue; + if (pwgts[uw] - vwgt[u] + vwgt[v] > maxpwgts[uw] || + pwgts[vw] - vwgt[v] + vwgt[u] > maxpwgts[vw]) + continue; + + for (k=urinfo->nnbrs-1; k>=0; k--) { + if (unbrs[k].pid == vw) + break; + } + if (k < 0) + printf("Something went wrong!\n"); + gain = unbrs[k].ed-urinfo->id; + + for (k=vrinfo->nnbrs-1; k>=0; k--) { + if (vnbrs[k].pid == uw) + break; + } + if (k < 0) + printf("Something went wrong!\n"); + gain += vnbrs[k].ed-vrinfo->id; + + gain -= 2*adjwgt[j]; - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("\t[%6d %6d], Balance: %5.3f, Nb: %6d. Nmoves: %5d, Cut: %6d\n", - pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], - 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nbnd, nmoves, graph->mincut)); + if (gain > 0) + printf(" Gain: %"PRIDX" for moving (%"PRIDX", %"PRIDX") between (%"PRIDX", %"PRIDX")\n", + gain, u, v, uw, vw); + } + } + + WCOREPOP; +} + + +/*************************************************************************/ +/*! K-way partitioning optimization in which the vertices are visited in + random order and the best edge is selected to swap its incident vertices + + \param graph is the graph that is being refined. + \param niter is the number of refinement iterations. + +*/ +/**************************************************************************/ +void Greedy_KWayEdgeCutOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter) +{ + /* Common variables to all types of kway-refinement/balancing routines */ + idx_t ii, j, k, pass, nvtxs, nparts, u, v, uw, vw, gain, bestgain, jbest; + idx_t from, me, to, oldcut, nmoved; + idx_t *xadj, *adjncy, *adjwgt, *vwgt; + idx_t *where, *pwgts, *perm, *bndptr, *bndind, *minpwgts, *maxpwgts; + idx_t bndtype = BNDTYPE_REFINE; + real_t *tpwgts, ubfactor; + + /* Edgecut-specific/different variables */ + idx_t nbnd, oldnnbrs; + ckrinfo_t *myrinfo, *urinfo, *vrinfo; + cnbr_t *unbrs, *vnbrs; + + WCOREPUSH; + + /* Link the graph fields */ + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + vwgt = graph->vwgt; + + bndind = graph->bndind; + bndptr = graph->bndptr; + + where = graph->where; + pwgts = graph->pwgts; + + nparts = ctrl->nparts; + tpwgts = ctrl->tpwgts; + + /* Setup the weight intervals of the various subdomains */ + minpwgts = iwspacemalloc(ctrl, nparts); + maxpwgts = iwspacemalloc(ctrl, nparts); + + ubfactor = gk_max(ctrl->ubfactors[0], ComputeLoadImbalance(graph, nparts, ctrl->pijbm)); + for (k=0; ktvwgt[0]*ubfactor; + minpwgts[k] = tpwgts[k]*graph->tvwgt[0]*(1.0/ubfactor); + } + + perm = iwspacemalloc(ctrl, nvtxs); + + + if (ctrl->dbglvl&METIS_DBG_REFINE) { + printf("GRE: [%6"PRIDX" %6"PRIDX"]-[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL"," + " Nv-Nb[%6"PRIDX" %6"PRIDX"], Cut: %6"PRIDX"\n", + pwgts[iargmin(nparts, pwgts,1)], imax(nparts, pwgts,1), minpwgts[0], maxpwgts[0], + ComputeLoadImbalance(graph, nparts, ctrl->pijbm), + graph->nvtxs, graph->nbnd, graph->mincut); } - PQueueFree(ctrl, &queue); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); + /*===================================================================== + * The top-level refinement loop + *======================================================================*/ + for (pass=0; passmincut); + + oldcut = graph->mincut; + nbnd = graph->nbnd; + nmoved = 0; + + /* Insert the boundary vertices in the priority queue */ + /* Visit the vertices in random order and see if you can swap them */ + irandArrayPermute(nvtxs, perm, nbnd, 1); + for (ii=0; iickrinfo+u; + unbrs = ctrl->cnbrpool + urinfo->inbr; + + bestgain = 0; + jbest = -1; + for (j=xadj[u]; j maxpwgts[uw] || + pwgts[vw] - vwgt[v] + vwgt[u] > maxpwgts[vw]) + continue; + if (pwgts[uw] - vwgt[u] + vwgt[v] < minpwgts[uw] || + pwgts[vw] - vwgt[v] + vwgt[u] < minpwgts[vw]) + continue; + + vrinfo = graph->ckrinfo+v; + vnbrs = ctrl->cnbrpool + vrinfo->inbr; + + gain = -2*adjwgt[j]; + + for (k=urinfo->nnbrs-1; k>=0; k--) { + if (unbrs[k].pid == vw) + break; + } + GKASSERT(k>=0); + gain += unbrs[k].ed-urinfo->id; + + for (k=vrinfo->nnbrs-1; k>=0; k--) { + if (vnbrs[k].pid == uw) + break; + } + GKASSERT(k>=0); + gain += vnbrs[k].ed-vrinfo->id; + + if (gain > bestgain && vnbrs[k].ed > adjwgt[j]) { + bestgain = gain; + jbest = j; + } + } + + if (jbest == -1) + continue; /* no valid positive swap */ + + + /*===================================================================== + * If we got here, we can now swap the vertices + *======================================================================*/ + v = adjncy[jbest]; + vw = where[v]; + + vrinfo = graph->ckrinfo+v; + vnbrs = ctrl->cnbrpool + vrinfo->inbr; + + /* move u to v's partition */ + for (k=urinfo->nnbrs-1; k>=0; k--) { + if (unbrs[k].pid == vw) + break; + } + GKASSERT(k>=0); + + from = uw; + to = vw; + + graph->mincut -= unbrs[k].ed-urinfo->id; + nmoved++; + + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("\t\tMoving %6"PRIDX" from %3"PRIDX" to %3"PRIDX" [%6"PRIDX" %6"PRIDX"]. Gain: %4"PRIDX". Cut: %6"PRIDX"\n", + u, from, to, pwgts[from], pwgts[to], unbrs[k].ed-urinfo->id, graph->mincut)); + + /* Update ID/ED and BND related information for the moved vertex */ + INC_DEC(pwgts[to], pwgts[from], vwgt[u]); + UpdateMovedVertexInfoAndBND(u, from, k, to, urinfo, unbrs, where, nbnd, + bndptr, bndind, bndtype); + + /* Update the degrees of adjacent vertices */ + for (j=xadj[u]; jckrinfo+ii; + + oldnnbrs = myrinfo->nnbrs; + + UpdateAdjacentVertexInfoAndBND(ctrl, ii, xadj[ii+1]-xadj[ii], me, + from, to, myrinfo, adjwgt[j], nbnd, bndptr, bndind, bndtype); + + ASSERT(myrinfo->nnbrs <= xadj[ii+1]-xadj[ii]); + } + + /* move v to u's partition */ + for (k=vrinfo->nnbrs-1; k>=0; k--) { + if (vnbrs[k].pid == uw) + break; + } + GKASSERT(k>=0); +#ifdef XXX + if (k < 0) { /* that was removed, go and re-insert it */ + k = vrinfo->nnbrs++; + vnbrs[k].pid = uw; + vnbrs[k].ed = 0; + } +#endif + + from = vw; + to = uw; + + graph->mincut -= vnbrs[k].ed-vrinfo->id; + nmoved++; + + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("\t\tMoving %6"PRIDX" from %3"PRIDX" to %3"PRIDX" [%6"PRIDX" %6"PRIDX"]. Gain: %4"PRIDX". Cut: %6"PRIDX"\n", + v, from, to, pwgts[from], pwgts[to], vnbrs[k].ed-vrinfo->id, graph->mincut)); + + /* Update ID/ED and BND related information for the moved vertex */ + INC_DEC(pwgts[to], pwgts[from], vwgt[v]); + UpdateMovedVertexInfoAndBND(v, from, k, to, vrinfo, vnbrs, where, nbnd, + bndptr, bndind, bndtype); + + /* Update the degrees of adjacent vertices */ + for (j=xadj[v]; jckrinfo+ii; + + oldnnbrs = myrinfo->nnbrs; + + UpdateAdjacentVertexInfoAndBND(ctrl, ii, xadj[ii+1]-xadj[ii], me, + from, to, myrinfo, adjwgt[j], nbnd, bndptr, bndind, bndtype); + + ASSERT(myrinfo->nnbrs <= xadj[ii+1]-xadj[ii]); + } + } + + graph->nbnd = nbnd; + + if (ctrl->dbglvl&METIS_DBG_REFINE) { + printf("\t[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL", Nb: %6"PRIDX"." + " Nmoves: %5"PRIDX", Cut: %6"PRIDX", Vol: %6"PRIDX"\n", + pwgts[iargmin(nparts, pwgts,1)], imax(nparts, pwgts,1), + ComputeLoadImbalance(graph, nparts, ctrl->pijbm), + graph->nbnd, nmoved, graph->mincut, ComputeVolume(graph, where)); + } + + if (nmoved == 0 || graph->mincut == oldcut) + break; + } + WCOREPOP; } diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kwayrefine.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kwayrefine.c index 61d53212..dec8b65a 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kwayrefine.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kwayrefine.c @@ -1,392 +1,682 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * kwayrefine.c - * - * This file contains the driving routines for multilevel k-way refinement - * - * Started 7/28/97 - * George - * - * $Id: kwayrefine.c,v 1.1 2003/07/16 15:55:05 karypis Exp $ - */ - -#include - - -/************************************************************************* -* This function is the entry point of refinement -**************************************************************************/ -void RefineKWay(CtrlType *ctrl, GraphType *orggraph, GraphType *graph, int nparts, float *tpwgts, float ubfactor) -{ - int i, nlevels, mustfree=0; - GraphType *ptr; +/*! +\file +\brief Driving routines for multilevel k-way refinement + +\date Started 7/28/1997 +\author George +\author Copyright 1997-2009, Regents of the University of Minnesota +\version $Id: kwayrefine.c 20398 2016-11-22 17:17:12Z karypis $ +*/ - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->UncoarsenTmr)); +#include "metislib.h" - /* Compute the parameters of the coarsest graph */ - ComputeKWayPartitionParams(ctrl, graph, nparts); - - /* Take care any non-contiguity */ - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->AuxTmr1)); - if (ctrl->RType == RTYPE_KWAYRANDOM_MCONN) { - EliminateComponents(ctrl, graph, nparts, tpwgts, 1.25); - EliminateSubDomainEdges(ctrl, graph, nparts, tpwgts); - EliminateComponents(ctrl, graph, nparts, tpwgts, 1.25); - } - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->AuxTmr1)); + +/*************************************************************************/ +/*! This function is the entry point of cut-based refinement */ +/*************************************************************************/ +void RefineKWay(ctrl_t *ctrl, graph_t *orggraph, graph_t *graph) +{ + idx_t i, nlevels, contig=ctrl->contig; + graph_t *ptr; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->UncoarsenTmr)); /* Determine how many levels are there */ for (ptr=graph, nlevels=0; ptr!=orggraph; ptr=ptr->finer, nlevels++); + /* Compute the parameters of the coarsest graph */ + ComputeKWayPartitionParams(ctrl, graph); + + /* Try to minimize the sub-domain connectivity */ + if (ctrl->minconn) + EliminateSubDomainEdges(ctrl, graph); + + /* Deal with contiguity constraints at the beginning */ + if (contig && FindPartitionInducedComponents(graph, graph->where, NULL, NULL) > ctrl->nparts) { + EliminateComponents(ctrl, graph); + + ComputeKWayBoundary(ctrl, graph, BNDTYPE_BALANCE); + Greedy_KWayOptimize(ctrl, graph, 5, 0, OMODE_BALANCE); + + ComputeKWayBoundary(ctrl, graph, BNDTYPE_REFINE); + Greedy_KWayOptimize(ctrl, graph, ctrl->niter, 0, OMODE_REFINE); + + ctrl->contig = 0; + } + + /* Refine each successively finer graph */ for (i=0; ;i++) { - /* PrintSubDomainGraph(graph, nparts, graph->where); */ - if (ctrl->RType == RTYPE_KWAYRANDOM_MCONN && (i == nlevels/2 || i == nlevels/2+1)) - EliminateSubDomainEdges(ctrl, graph, nparts, tpwgts); - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->RefTmr)); - - if (2*i >= nlevels && !IsBalanced(graph->pwgts, nparts, tpwgts, 1.04*ubfactor)) { - ComputeKWayBalanceBoundary(ctrl, graph, nparts); - if (ctrl->RType == RTYPE_KWAYRANDOM_MCONN) - Greedy_KWayEdgeBalanceMConn(ctrl, graph, nparts, tpwgts, ubfactor, 1); - else - Greedy_KWayEdgeBalance(ctrl, graph, nparts, tpwgts, ubfactor, 1); - ComputeKWayBoundary(ctrl, graph, nparts); + if (ctrl->minconn && i == nlevels/2) + EliminateSubDomainEdges(ctrl, graph); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->RefTmr)); + + if (2*i >= nlevels && !IsBalanced(ctrl, graph, .02)) { + ComputeKWayBoundary(ctrl, graph, BNDTYPE_BALANCE); + Greedy_KWayOptimize(ctrl, graph, 1, 0, OMODE_BALANCE); + ComputeKWayBoundary(ctrl, graph, BNDTYPE_REFINE); } - switch (ctrl->RType) { - case RTYPE_KWAYRANDOM: - Random_KWayEdgeRefine(ctrl, graph, nparts, tpwgts, ubfactor, 10, 1); - break; - case RTYPE_KWAYGREEDY: - Greedy_KWayEdgeRefine(ctrl, graph, nparts, tpwgts, ubfactor, 10); - break; - case RTYPE_KWAYRANDOM_MCONN: - Random_KWayEdgeRefineMConn(ctrl, graph, nparts, tpwgts, ubfactor, 10, 1); - break; + Greedy_KWayOptimize(ctrl, graph, ctrl->niter, 5.0, OMODE_REFINE); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->RefTmr)); + + /* Deal with contiguity constraints in the middle */ + if (contig && i == nlevels/2) { + if (FindPartitionInducedComponents(graph, graph->where, NULL, NULL) > ctrl->nparts) { + EliminateComponents(ctrl, graph); + + if (!IsBalanced(ctrl, graph, .02)) { + ctrl->contig = 1; + ComputeKWayBoundary(ctrl, graph, BNDTYPE_BALANCE); + Greedy_KWayOptimize(ctrl, graph, 5, 0, OMODE_BALANCE); + + ComputeKWayBoundary(ctrl, graph, BNDTYPE_REFINE); + Greedy_KWayOptimize(ctrl, graph, ctrl->niter, 0, OMODE_REFINE); + ctrl->contig = 0; + } + } } - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->RefTmr)); if (graph == orggraph) break; - GKfree((void **)(&graph->gdata), LTERM); /* Deallocate the graph related arrays */ - graph = graph->finer; - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ProjectTmr)); - if (graph->vwgt == NULL) { - graph->vwgt = idxsmalloc(graph->nvtxs, 1, "RefineKWay: graph->vwgt"); - graph->adjwgt = idxsmalloc(graph->nedges, 1, "RefineKWay: graph->adjwgt"); - mustfree = 1; - } - ProjectKWayPartition(ctrl, graph, nparts); - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ProjectTmr)); - } + graph_ReadFromDisk(ctrl, graph); - if (!IsBalanced(graph->pwgts, nparts, tpwgts, ubfactor)) { - ComputeKWayBalanceBoundary(ctrl, graph, nparts); - if (ctrl->RType == RTYPE_KWAYRANDOM_MCONN) { - Greedy_KWayEdgeBalanceMConn(ctrl, graph, nparts, tpwgts, ubfactor, 8); - Random_KWayEdgeRefineMConn(ctrl, graph, nparts, tpwgts, ubfactor, 10, 0); - } - else { - Greedy_KWayEdgeBalance(ctrl, graph, nparts, tpwgts, ubfactor, 8); - Random_KWayEdgeRefine(ctrl, graph, nparts, tpwgts, ubfactor, 10, 0); - } + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->ProjectTmr)); + ASSERT(graph->vwgt != NULL); + + ProjectKWayPartition(ctrl, graph); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->ProjectTmr)); } - /* Take care any trivial non-contiguity */ - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->AuxTmr2)); - EliminateComponents(ctrl, graph, nparts, tpwgts, ubfactor); - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->AuxTmr2)); + /* Deal with contiguity requirement at the end */ + ctrl->contig = contig; + if (contig && FindPartitionInducedComponents(graph, graph->where, NULL, NULL) > ctrl->nparts) + EliminateComponents(ctrl, graph); + + if (!IsBalanced(ctrl, graph, 0.0)) { + ComputeKWayBoundary(ctrl, graph, BNDTYPE_BALANCE); + Greedy_KWayOptimize(ctrl, graph, 10, 0, OMODE_BALANCE); - if (mustfree) - GKfree((void **)(&graph->vwgt), &graph->adjwgt, LTERM); + ComputeKWayBoundary(ctrl, graph, BNDTYPE_REFINE); + Greedy_KWayOptimize(ctrl, graph, ctrl->niter, 0, OMODE_REFINE); + } + + if (ctrl->contig) + ASSERT(FindPartitionInducedComponents(graph, graph->where, NULL, NULL) == ctrl->nparts); - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->UncoarsenTmr)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->UncoarsenTmr)); } -/************************************************************************* -* This function allocates memory for k-way edge refinement -**************************************************************************/ -void AllocateKWayPartitionMemory(CtrlType *ctrl, GraphType *graph, int nparts) +/*************************************************************************/ +/*! This function allocates memory for the k-way cut-based refinement */ +/*************************************************************************/ +void AllocateKWayPartitionMemory(ctrl_t *ctrl, graph_t *graph) { - int nvtxs, pad64; - nvtxs = graph->nvtxs; + graph->pwgts = imalloc(ctrl->nparts*graph->ncon, "AllocateKWayPartitionMemory: pwgts"); + graph->where = imalloc(graph->nvtxs, "AllocateKWayPartitionMemory: where"); + graph->bndptr = imalloc(graph->nvtxs, "AllocateKWayPartitionMemory: bndptr"); + graph->bndind = imalloc(graph->nvtxs, "AllocateKWayPartitionMemory: bndind"); - pad64 = (3*nvtxs+nparts)%2; + switch (ctrl->objtype) { + case METIS_OBJTYPE_CUT: + graph->ckrinfo = (ckrinfo_t *)gk_malloc(graph->nvtxs*sizeof(ckrinfo_t), + "AllocateKWayPartitionMemory: ckrinfo"); + break; - graph->rdata = idxmalloc(3*nvtxs+nparts+(sizeof(RInfoType)/sizeof(idxtype))*nvtxs+pad64, "AllocateKWayPartitionMemory: rdata"); - graph->pwgts = graph->rdata; - graph->where = graph->rdata + nparts; - graph->bndptr = graph->rdata + nvtxs + nparts; - graph->bndind = graph->rdata + 2*nvtxs + nparts; - graph->rinfo = (RInfoType *)(graph->rdata + 3*nvtxs+nparts + pad64); + case METIS_OBJTYPE_VOL: + graph->vkrinfo = (vkrinfo_t *)gk_malloc(graph->nvtxs*sizeof(vkrinfo_t), + "AllocateKWayVolPartitionMemory: vkrinfo"); + + /* This is to let the cut-based -minconn and -contig large-scale graph + changes to go through */ + graph->ckrinfo = (ckrinfo_t *)graph->vkrinfo; + break; + + default: + gk_errexit(SIGERR, "Unknown objtype of %d\n", ctrl->objtype); + } -/* - if (ctrl->wspace.edegrees != NULL) - GKfree((void **)&ctrl->wspace.edegrees, LTERM); - ctrl->wspace.edegrees = (EDegreeType *)GKmalloc(graph->nedges*sizeof(EDegreeType), "AllocateKWayPartitionMemory: edegrees"); -*/ } -/************************************************************************* -* This function computes the initial id/ed -**************************************************************************/ -void ComputeKWayPartitionParams(CtrlType *ctrl, GraphType *graph, int nparts) +/*************************************************************************/ +/*! This function computes the initial id/ed for cut-based partitioning */ +/*************************************************************************/ +void ComputeKWayPartitionParams(ctrl_t *ctrl, graph_t *graph) { - int i, j, k, l, nvtxs, nbnd, mincut, me, other; - idxtype *xadj, *vwgt, *adjncy, *adjwgt, *pwgts, *where, *bndind, *bndptr; - RInfoType *rinfo, *myrinfo; - EDegreeType *myedegrees; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - vwgt = graph->vwgt; + idx_t i, j, k, l, nvtxs, ncon, nparts, nbnd, mincut, me, other; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *pwgts, *where, *bndind, *bndptr; + + nparts = ctrl->nparts; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; - where = graph->where; - pwgts = idxset(nparts, 0, graph->pwgts); + where = graph->where; + pwgts = iset(nparts*ncon, 0, graph->pwgts); bndind = graph->bndind; - bndptr = idxset(nvtxs, -1, graph->bndptr); - rinfo = graph->rinfo; - + bndptr = iset(nvtxs, -1, graph->bndptr); - /*------------------------------------------------------------ - / Compute now the id/ed degrees - /------------------------------------------------------------*/ - ctrl->wspace.cdegree = 0; nbnd = mincut = 0; - for (i=0; iid = myrinfo->ed = myrinfo->ndegrees = 0; - myrinfo->edegrees = NULL; - - for (j=xadj[i]; jed += adjwgt[j]; + /* Compute pwgts */ + if (ncon == 1) { + for (i=0; i= 0 && where[i] < nparts); + pwgts[where[i]] += vwgt[i]; } - myrinfo->id = graph->adjwgtsum[i] - myrinfo->ed; + } + else { + for (i=0; ied > 0) - mincut += myrinfo->ed; + /* Compute the required info for refinement */ + switch (ctrl->objtype) { + case METIS_OBJTYPE_CUT: + { + ckrinfo_t *myrinfo; + cnbr_t *mynbrs; + + memset(graph->ckrinfo, 0, sizeof(ckrinfo_t)*nvtxs); + cnbrpoolReset(ctrl); + + for (i=0; ickrinfo+i; + + for (j=xadj[i]; jid += adjwgt[j]; + else + myrinfo->ed += adjwgt[j]; + } - if (myrinfo->ed-myrinfo->id >= 0) - BNDInsert(nbnd, bndind, bndptr, i); + /* Time to compute the particular external degrees */ + if (myrinfo->ed > 0) { + mincut += myrinfo->ed; + + myrinfo->inbr = cnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]); + mynbrs = ctrl->cnbrpool + myrinfo->inbr; + + for (j=xadj[i]; jnnbrs; k++) { + if (mynbrs[k].pid == other) { + mynbrs[k].ed += adjwgt[j]; + break; + } + } + if (k == myrinfo->nnbrs) { + mynbrs[k].pid = other; + mynbrs[k].ed = adjwgt[j]; + myrinfo->nnbrs++; + } + } + } - /* Time to compute the particular external degrees */ - if (myrinfo->ed > 0) { - myedegrees = myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree; - ctrl->wspace.cdegree += xadj[i+1]-xadj[i]; + ASSERT(myrinfo->nnbrs <= xadj[i+1]-xadj[i]); - for (j=xadj[i]; jndegrees; k++) { - if (myedegrees[k].pid == other) { - myedegrees[k].ed += adjwgt[j]; - break; - } + /* Only ed-id>=0 nodes are considered to be in the boundary */ + if (myrinfo->ed-myrinfo->id >= 0) + BNDInsert(nbnd, bndind, bndptr, i); } - if (k == myrinfo->ndegrees) { - myedegrees[myrinfo->ndegrees].pid = other; - myedegrees[myrinfo->ndegrees++].ed = adjwgt[j]; + else { + myrinfo->inbr = -1; } } + + graph->mincut = mincut/2; + graph->nbnd = nbnd; + } + ASSERT(CheckBnd2(graph)); + break; - ASSERT(myrinfo->ndegrees <= xadj[i+1]-xadj[i]); - } + case METIS_OBJTYPE_VOL: + { + vkrinfo_t *myrinfo; + vnbr_t *mynbrs; + + memset(graph->vkrinfo, 0, sizeof(vkrinfo_t)*nvtxs); + vnbrpoolReset(ctrl); + + /* Compute now the id/ed degrees */ + for (i=0; ivkrinfo+i; + + for (j=xadj[i]; jnid++; + else + myrinfo->ned++; + } + + /* Time to compute the particular external degrees */ + if (myrinfo->ned > 0) { + mincut += myrinfo->ned; + + myrinfo->inbr = vnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]); + mynbrs = ctrl->vnbrpool + myrinfo->inbr; + + for (j=xadj[i]; jnnbrs; k++) { + if (mynbrs[k].pid == other) { + mynbrs[k].ned++; + break; + } + } + if (k == myrinfo->nnbrs) { + mynbrs[k].gv = 0; + mynbrs[k].pid = other; + mynbrs[k].ned = 1; + myrinfo->nnbrs++; + } + } + } + ASSERT(myrinfo->nnbrs <= xadj[i+1]-xadj[i]); + } + else { + myrinfo->inbr = -1; + } + } + graph->mincut = mincut/2; + + ComputeKWayVolGains(ctrl, graph); + } + ASSERT(graph->minvol == ComputeVolume(graph, graph->where)); + break; + default: + gk_errexit(SIGERR, "Unknown objtype of %d\n", ctrl->objtype); } - graph->mincut = mincut/2; - graph->nbnd = nbnd; - } - -/************************************************************************* -* This function projects a partition, and at the same time computes the -* parameters for refinement. -**************************************************************************/ -void ProjectKWayPartition(CtrlType *ctrl, GraphType *graph, int nparts) +/*************************************************************************/ +/*! This function projects a partition, and at the same time computes the + parameters for refinement. */ +/*************************************************************************/ +void ProjectKWayPartition(ctrl_t *ctrl, graph_t *graph) { - int i, j, k, nvtxs, nbnd, me, other, istart, iend, ndegrees; - idxtype *xadj, *adjncy, *adjwgt, *adjwgtsum; - idxtype *cmap, *where, *bndptr, *bndind; - idxtype *cwhere; - GraphType *cgraph; - RInfoType *crinfo, *rinfo, *myrinfo; - EDegreeType *myedegrees; - idxtype *htable; + idx_t i, j, k, nvtxs, nbnd, nparts, me, other, istart, iend, tid, ted; + idx_t *xadj, *adjncy, *adjwgt; + idx_t *cmap, *where, *bndptr, *bndind, *cwhere, *htable; + graph_t *cgraph; + int dropedges; + + WCOREPUSH; + + dropedges = ctrl->dropedges; + + nparts = ctrl->nparts; cgraph = graph->coarser; cwhere = cgraph->where; - crinfo = cgraph->rinfo; - nvtxs = graph->nvtxs; - cmap = graph->cmap; - xadj = graph->xadj; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - adjwgtsum = graph->adjwgtsum; + if (ctrl->objtype == METIS_OBJTYPE_CUT) { + ASSERT(CheckBnd2(cgraph)); + } + else { + ASSERT(cgraph->minvol == ComputeVolume(cgraph, cgraph->where)); + } - AllocateKWayPartitionMemory(ctrl, graph, nparts); - where = graph->where; - rinfo = graph->rinfo; - bndind = graph->bndind; - bndptr = idxset(nvtxs, -1, graph->bndptr); + /* free the coarse graph's structure (reduce maxmem) */ + FreeSData(cgraph); - /* Go through and project partition and compute id/ed for the nodes */ - for (i=0; invtxs; + cmap = graph->cmap; + xadj = graph->xadj; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; - htable = idxset(nparts, -1, idxwspacemalloc(ctrl, nparts)); + AllocateKWayPartitionMemory(ctrl, graph); - ctrl->wspace.cdegree = 0; - for (nbnd=0, i=0; iwhere; + bndind = graph->bndind; + bndptr = iset(nvtxs, -1, graph->bndptr); + + htable = iset(nparts, -1, iwspacemalloc(ctrl, nparts)); + + /* Compute the required info for refinement */ + switch (ctrl->objtype) { + case METIS_OBJTYPE_CUT: + { + ckrinfo_t *myrinfo; + cnbr_t *mynbrs; + + /* go through and project partition and compute id/ed for the nodes */ + for (i=0; ickrinfo[k].ed); /* For optimization */ + } - myrinfo = rinfo+i; - myrinfo->id = myrinfo->ed = myrinfo->ndegrees = 0; - myrinfo->edegrees = NULL; + memset(graph->ckrinfo, 0, sizeof(ckrinfo_t)*nvtxs); + cnbrpoolReset(ctrl); - myrinfo->id = adjwgtsum[i]; + for (nbnd=0, i=0; i 0) { /* If it is an interface node. Note cmap[i] = crinfo[cmap[i]].ed */ - istart = xadj[i]; - iend = xadj[i+1]; + myrinfo = graph->ckrinfo+i; - myedegrees = myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree; - ctrl->wspace.cdegree += iend-istart; + if (cmap[i] == 0) { /* Interior node. Note that cmap[i] = crinfo[cmap[i]].ed */ + for (tid=0, j=istart; jed += adjwgt[j]; - if ((k = htable[other]) == -1) { - htable[other] = ndegrees; - myedegrees[ndegrees].pid = other; - myedegrees[ndegrees++].ed = adjwgt[j]; + myrinfo->id = tid; + myrinfo->inbr = -1; } - else { - myedegrees[k].ed += adjwgt[j]; + else { /* Potentially an interface node */ + myrinfo->inbr = cnbrpoolGetNext(ctrl, iend-istart); + mynbrs = ctrl->cnbrpool + myrinfo->inbr; + + me = where[i]; + for (tid=0, ted=0, j=istart; jnnbrs; + mynbrs[myrinfo->nnbrs].pid = other; + mynbrs[myrinfo->nnbrs++].ed = adjwgt[j]; + } + else { + mynbrs[k].ed += adjwgt[j]; + } + } + } + myrinfo->id = tid; + myrinfo->ed = ted; + + /* Remove space for edegrees if it was interior */ + if (ted == 0) { + ctrl->nbrpoolcpos -= gk_min(nparts, iend-istart); + myrinfo->inbr = -1; + } + else { + if (ted-tid >= 0) + BNDInsert(nbnd, bndind, bndptr, i); + + for (j=0; jnnbrs; j++) + htable[mynbrs[j].pid] = -1; + } } } + + graph->nbnd = nbnd; } - myrinfo->id -= myrinfo->ed; + ASSERT(CheckBnd2(graph)); + break; - /* Remove space for edegrees if it was interior */ - if (myrinfo->ed == 0) { - myrinfo->edegrees = NULL; - ctrl->wspace.cdegree -= iend-istart; - } - else { - if (myrinfo->ed-myrinfo->id >= 0) - BNDInsert(nbnd, bndind, bndptr, i); + case METIS_OBJTYPE_VOL: + { + vkrinfo_t *myrinfo; + vnbr_t *mynbrs; - myrinfo->ndegrees = ndegrees; + /* go through and project partition and compute id/ed for the nodes */ + for (i=0; ivkrinfo[k].ned); /* For optimization */ + } + + memset(graph->vkrinfo, 0, sizeof(vkrinfo_t)*nvtxs); + vnbrpoolReset(ctrl); + + for (i=0; ivkrinfo+i; + + if (cmap[i] == 0) { /* Note that cmap[i] = crinfo[cmap[i]].ed */ + myrinfo->nid = iend-istart; + myrinfo->inbr = -1; + } + else { /* Potentially an interface node */ + myrinfo->inbr = vnbrpoolGetNext(ctrl, iend-istart); + mynbrs = ctrl->vnbrpool + myrinfo->inbr; + + me = where[i]; + for (tid=0, ted=0, j=istart; jnnbrs; + mynbrs[myrinfo->nnbrs].gv = 0; + mynbrs[myrinfo->nnbrs].pid = other; + mynbrs[myrinfo->nnbrs++].ned = 1; + } + else { + mynbrs[k].ned++; + } + } + } + myrinfo->nid = tid; + myrinfo->ned = ted; + + /* Remove space for edegrees if it was interior */ + if (ted == 0) { + ctrl->nbrpoolcpos -= gk_min(nparts, iend-istart); + myrinfo->inbr = -1; + } + else { + for (j=0; jnnbrs; j++) + htable[mynbrs[j].pid] = -1; + } + } + } + + ComputeKWayVolGains(ctrl, graph); - for (j=0; jminvol == ComputeVolume(graph, graph->where)); } - } + break; + + default: + gk_errexit(SIGERR, "Unknown objtype of %d\n", ctrl->objtype); } - idxcopy(nparts, cgraph->pwgts, graph->pwgts); - graph->mincut = cgraph->mincut; - graph->nbnd = nbnd; + graph->mincut = (dropedges ? ComputeCut(graph, where) : cgraph->mincut); + icopy(nparts*graph->ncon, cgraph->pwgts, graph->pwgts); - FreeGraph(graph->coarser); - graph->coarser = NULL; + FreeGraph(&graph->coarser); - idxwspacefree(ctrl, nparts); + WCOREPOP; +} - ASSERT(CheckBnd2(graph)); -} +/*************************************************************************/ +/*! This function computes the boundary definition for balancing. */ +/*************************************************************************/ +void ComputeKWayBoundary(ctrl_t *ctrl, graph_t *graph, idx_t bndtype) +{ + idx_t i, nvtxs, nbnd; + idx_t *bndind, *bndptr; + nvtxs = graph->nvtxs; + bndind = graph->bndind; + bndptr = iset(nvtxs, -1, graph->bndptr); + nbnd = 0; -/************************************************************************* -* This function checks if the partition weights are within the balance -* contraints -**************************************************************************/ -int IsBalanced(idxtype *pwgts, int nparts, float *tpwgts, float ubfactor) -{ - int i, j, tvwgt; + switch (ctrl->objtype) { + case METIS_OBJTYPE_CUT: + /* Compute the boundary */ + if (bndtype == BNDTYPE_REFINE) { + for (i=0; ickrinfo[i].ed > 0 && graph->ckrinfo[i].ed-graph->ckrinfo[i].id >= 0) + BNDInsert(nbnd, bndind, bndptr, i); + } + } + else { /* BNDTYPE_BALANCE */ + for (i=0; ickrinfo[i].ed > 0) + BNDInsert(nbnd, bndind, bndptr, i); + } + } + break; - tvwgt = idxsum(nparts, pwgts); - for (i=0; i tpwgts[i]*tvwgt*(ubfactor+0.005)) - return 0; + case METIS_OBJTYPE_VOL: + /* Compute the boundary */ + if (bndtype == BNDTYPE_REFINE) { + for (i=0; ivkrinfo[i].gv >= 0) + BNDInsert(nbnd, bndind, bndptr, i); + } + } + else { /* BNDTYPE_BALANCE */ + for (i=0; ivkrinfo[i].ned > 0) + BNDInsert(nbnd, bndind, bndptr, i); + } + } + break; + + default: + gk_errexit(SIGERR, "Unknown objtype of %d\n", ctrl->objtype); } - return 1; + graph->nbnd = nbnd; } -/************************************************************************* -* This function computes the boundary definition for balancing -**************************************************************************/ -void ComputeKWayBoundary(CtrlType *ctrl, GraphType *graph, int nparts) +/*************************************************************************/ +/*! This function computes the initial gains in the communication volume */ +/*************************************************************************/ +void ComputeKWayVolGains(ctrl_t *ctrl, graph_t *graph) { - int i, nvtxs, nbnd; - idxtype *bndind, *bndptr; + idx_t i, ii, j, k, l, nvtxs, nparts, me, other, pid; + idx_t *xadj, *vsize, *adjncy, *adjwgt, *where, + *bndind, *bndptr, *ophtable; + vkrinfo_t *myrinfo, *orinfo; + vnbr_t *mynbrs, *onbrs; + + WCOREPUSH; + + nparts = ctrl->nparts; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vsize = graph->vsize; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; - nvtxs = graph->nvtxs; + where = graph->where; bndind = graph->bndind; - bndptr = idxset(nvtxs, -1, graph->bndptr); + bndptr = iset(nvtxs, -1, graph->bndptr); + ophtable = iset(nparts, -1, iwspacemalloc(ctrl, nparts)); - /*------------------------------------------------------------ - / Compute the new boundary - /------------------------------------------------------------*/ - nbnd = 0; + /* Compute the volume gains */ + graph->minvol = graph->nbnd = 0; for (i=0; irinfo[i].ed-graph->rinfo[i].id >= 0) - BNDInsert(nbnd, bndind, bndptr, i); - } + myrinfo = graph->vkrinfo+i; + myrinfo->gv = IDX_MIN; - graph->nbnd = nbnd; -} + if (myrinfo->nnbrs > 0) { + me = where[i]; + mynbrs = ctrl->vnbrpool + myrinfo->inbr; -/************************************************************************* -* This function computes the boundary definition for balancing -**************************************************************************/ -void ComputeKWayBalanceBoundary(CtrlType *ctrl, GraphType *graph, int nparts) -{ - int i, nvtxs, nbnd; - idxtype *bndind, *bndptr; + graph->minvol += myrinfo->nnbrs*vsize[i]; - nvtxs = graph->nvtxs; - bndind = graph->bndind; - bndptr = idxset(nvtxs, -1, graph->bndptr); + for (j=xadj[i]; jvkrinfo+ii; + onbrs = ctrl->vnbrpool + orinfo->inbr; + + for (k=0; knnbrs; k++) + ophtable[onbrs[k].pid] = k; + ophtable[other] = 1; /* this is to simplify coding */ + + if (me == other) { + /* Find which domains 'i' is connected to but 'ii' is not + and update their gain */ + for (k=0; knnbrs; k++) { + if (ophtable[mynbrs[k].pid] == -1) + mynbrs[k].gv -= vsize[ii]; + } + } + else { + ASSERT(ophtable[me] != -1); + + if (onbrs[ophtable[me]].ned == 1) { + /* I'm the only connection of 'ii' in 'me' */ + /* Increase the gains for all the common domains between 'i' and 'ii' */ + for (k=0; knnbrs; k++) { + if (ophtable[mynbrs[k].pid] != -1) + mynbrs[k].gv += vsize[ii]; + } + } + else { + /* Find which domains 'i' is connected to and 'ii' is not + and update their gain */ + for (k=0; knnbrs; k++) { + if (ophtable[mynbrs[k].pid] == -1) + mynbrs[k].gv -= vsize[ii]; + } + } + } + /* Reset the marker vector */ + for (k=0; knnbrs; k++) + ophtable[onbrs[k].pid] = -1; + ophtable[other] = -1; + } - /*------------------------------------------------------------ - / Compute the new boundary - /------------------------------------------------------------*/ - nbnd = 0; - for (i=0; irinfo[i].ed > 0) - BNDInsert(nbnd, bndind, bndptr, i); + /* Compute the max vgain */ + for (k=0; knnbrs; k++) { + if (mynbrs[k].gv > myrinfo->gv) + myrinfo->gv = mynbrs[k].gv; + } + + /* Add the extra gain due to id == 0 */ + if (myrinfo->ned > 0 && myrinfo->nid == 0) + myrinfo->gv += vsize[i]; + } + + if (myrinfo->gv >= 0) + BNDInsert(graph->nbnd, bndind, bndptr, i); } - graph->nbnd = nbnd; + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function checks if the partition weights are within the balance +constraints */ +/*************************************************************************/ +int IsBalanced(ctrl_t *ctrl, graph_t *graph, real_t ffactor) +{ + return + (ComputeLoadImbalanceDiff(graph, ctrl->nparts, ctrl->pijbm, ctrl->ubfactors) + <= ffactor); } diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kwayvolfm.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kwayvolfm.c deleted file mode 100644 index 86566cb3..00000000 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kwayvolfm.c +++ /dev/null @@ -1,1775 +0,0 @@ -/* - * kwayvolfm.c - * - * This file contains code that implements the multilevel k-way refinement - * - * Started 7/8/98 - * George - * - * $Id: kwayvolfm.c,v 1.2 2003/07/31 06:14:01 karypis Exp $ - * - */ - -#include - - -/************************************************************************* -* This function performs k-way refinement -**************************************************************************/ -void Random_KWayVolRefine(CtrlType *ctrl, GraphType *graph, int nparts, float *tpwgts, - float ubfactor, int npasses, int ffactor) -{ - int i, ii, iii, j, jj, k, kk, l, u, pass, nvtxs, nmoves, tvwgt, myndegrees, xgain; - int from, me, to, oldcut, oldvol, vwgt; - idxtype *xadj, *adjncy, *adjwgt; - idxtype *where, *pwgts, *perm, *bndptr, *bndind, *minwgt, *maxwgt, *itpwgts, *updind, *marker, *phtable; - VEDegreeType *myedegrees; - VRInfoType *myrinfo; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - bndptr = graph->bndptr; - bndind = graph->bndind; - - where = graph->where; - pwgts = graph->pwgts; - - /* Setup the weight intervals of the various subdomains */ - minwgt = idxwspacemalloc(ctrl, nparts); - maxwgt = idxwspacemalloc(ctrl, nparts); - itpwgts = idxwspacemalloc(ctrl, nparts); - tvwgt = idxsum(nparts, pwgts); - ASSERT(tvwgt == idxsum(nvtxs, graph->vwgt)); - - updind = idxmalloc(nvtxs, "Random_KWayVolRefine: updind"); - marker = idxsmalloc(nvtxs, 0, "Random_KWayVolRefine: marker"); - phtable = idxsmalloc(nparts, -1, "Random_KWayVolRefine: phtable"); - - for (i=0; idbglvl, DBG_REFINE, - printf("VolPart: [%5d %5d]-[%5d %5d], Balance: %3.2f, Nv-Nb[%5d %5d]. Cut: %5d, Vol: %5d\n", - pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], minwgt[0], maxwgt[0], - 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nvtxs, graph->nbnd, - graph->mincut, graph->minvol)); - - for (pass=0; passmincut); - - oldcut = graph->mincut; - oldvol = graph->minvol; - - RandomPermute(graph->nbnd, perm, 1); - for (nmoves=iii=0; iiinbnd; iii++) { - ii = perm[iii]; - if (ii >= graph->nbnd) - continue; - i = bndind[ii]; - myrinfo = graph->vrinfo+i; - - if (myrinfo->gv >= 0) { /* Total volume gain is too high */ - from = where[i]; - vwgt = graph->vwgt[i]; - - if (myrinfo->id > 0 && pwgts[from]-vwgt < minwgt[from]) - continue; /* This cannot be moved! */ - - xgain = (myrinfo->id == 0 && myrinfo->ed > 0 ? graph->vsize[i] : 0); - - myedegrees = myrinfo->edegrees; - myndegrees = myrinfo->ndegrees; - - for (k=0; k= 0) - break; - } - if (k == myndegrees) - continue; /* break out if you did not find a candidate */ - - for (j=k+1; j maxwgt[to]) - continue; - if (myedegrees[j].gv > myedegrees[k].gv || - (myedegrees[j].gv == myedegrees[k].gv && myedegrees[j].ed > myedegrees[k].ed) || - (myedegrees[j].gv == myedegrees[k].gv && myedegrees[j].ed == myedegrees[k].ed && - itpwgts[myedegrees[k].pid]*pwgts[to] < itpwgts[to]*pwgts[myedegrees[k].pid])) - k = j; - } - - to = myedegrees[k].pid; - - j = 0; - if (xgain+myedegrees[k].gv > 0 || myedegrees[k].ed-myrinfo->id > 0) - j = 1; - else if (myedegrees[k].ed-myrinfo->id == 0) { - if ((iii&5) == 0 || pwgts[from] >= maxwgt[from] || itpwgts[from]*(pwgts[to]+vwgt) < itpwgts[to]*pwgts[from]) - j = 1; - } - if (j == 0) - continue; - - /*===================================================================== - * If we got here, we can now move the vertex from 'from' to 'to' - *======================================================================*/ - INC_DEC(pwgts[to], pwgts[from], vwgt); - graph->mincut -= myedegrees[k].ed-myrinfo->id; - graph->minvol -= (xgain+myedegrees[k].gv); - where[i] = to; - - IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("\t\tMoving %6d from %3d to %3d. Gain: [%4d %4d]. Cut: %6d, Vol: %6d\n", - i, from, to, xgain+myedegrees[k].gv, myedegrees[k].ed-myrinfo->id, graph->mincut, graph->minvol)); - - KWayVolUpdate(ctrl, graph, i, from, to, marker, phtable, updind); - - nmoves++; - - /* CheckVolKWayPartitionParams(ctrl, graph, nparts); */ - } - } - - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("\t[%6d %6d], Balance: %5.3f, Nb: %6d. Nmoves: %5d, Cut: %6d, Vol: %6d\n", - pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], - 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nbnd, nmoves, graph->mincut, - graph->minvol)); - - if (graph->minvol == oldvol && graph->mincut == oldcut) - break; - } - - GKfree((void **)(&marker), &updind, &phtable, LTERM); - - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nvtxs); -} - - -/************************************************************************* -* This function performs k-way refinement -**************************************************************************/ -void Random_KWayVolRefineMConn(CtrlType *ctrl, GraphType *graph, int nparts, float *tpwgts, - float ubfactor, int npasses, int ffactor) -{ - int i, ii, iii, j, jj, k, kk, l, u, pass, nvtxs, nmoves, tvwgt, myndegrees, xgain; - int from, me, to, oldcut, oldvol, vwgt, nadd, maxndoms; - idxtype *xadj, *adjncy, *adjwgt; - idxtype *where, *pwgts, *perm, *bndptr, *bndind, *minwgt, *maxwgt, *itpwgts, *updind, *marker, *phtable; - idxtype *pmat, *pmatptr, *ndoms; - VEDegreeType *myedegrees; - VRInfoType *myrinfo; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - bndptr = graph->bndptr; - bndind = graph->bndind; - - where = graph->where; - pwgts = graph->pwgts; - - /* Setup the weight intervals of the various subdomains */ - minwgt = idxwspacemalloc(ctrl, nparts); - maxwgt = idxwspacemalloc(ctrl, nparts); - itpwgts = idxwspacemalloc(ctrl, nparts); - tvwgt = idxsum(nparts, pwgts); - ASSERT(tvwgt == idxsum(nvtxs, graph->vwgt)); - - updind = idxmalloc(nvtxs, "Random_KWayVolRefine: updind"); - marker = idxsmalloc(nvtxs, 0, "Random_KWayVolRefine: marker"); - phtable = idxsmalloc(nparts, -1, "Random_KWayVolRefine: phtable"); - - pmat = ctrl->wspace.pmat; - ndoms = idxwspacemalloc(ctrl, nparts); - - ComputeVolSubDomainGraph(graph, nparts, pmat, ndoms); - - for (i=0; idbglvl, DBG_REFINE, - printf("VolPart: [%5d %5d]-[%5d %5d], Balance: %3.2f, Nv-Nb[%5d %5d]. Cut: %5d, Vol: %5d\n", - pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], minwgt[0], maxwgt[0], - 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nvtxs, graph->nbnd, - graph->mincut, graph->minvol)); - - for (pass=0; passmincut); - - maxndoms = ndoms[idxamax(nparts, ndoms)]; - - oldcut = graph->mincut; - oldvol = graph->minvol; - - RandomPermute(graph->nbnd, perm, 1); - for (nmoves=iii=0; iiinbnd; iii++) { - ii = perm[iii]; - if (ii >= graph->nbnd) - continue; - i = bndind[ii]; - myrinfo = graph->vrinfo+i; - - if (myrinfo->gv >= 0) { /* Total volume gain is too high */ - from = where[i]; - vwgt = graph->vwgt[i]; - - if (myrinfo->id > 0 && pwgts[from]-vwgt < minwgt[from]) - continue; /* This cannot be moved! */ - - xgain = (myrinfo->id == 0 && myrinfo->ed > 0 ? graph->vsize[i] : 0); - - myedegrees = myrinfo->edegrees; - myndegrees = myrinfo->ndegrees; - - /* Determine the valid domains */ - for (j=0; j maxndoms-1) { - phtable[to] = 0; - nadd = maxndoms; - break; - } - nadd++; - } - } - if (ndoms[to]+nadd > maxndoms) - phtable[to] = 0; - if (nadd == 0) - phtable[to] = 2; - } - - for (k=0; k= 0) - break; - } - if (k == myndegrees) - continue; /* break out if you did not find a candidate */ - - for (j=k+1; j maxwgt[to]) - continue; - if (myedegrees[j].gv > myedegrees[k].gv || - (myedegrees[j].gv == myedegrees[k].gv && myedegrees[j].ed > myedegrees[k].ed) || - (myedegrees[j].gv == myedegrees[k].gv && myedegrees[j].ed == myedegrees[k].ed && - itpwgts[myedegrees[k].pid]*pwgts[to] < itpwgts[to]*pwgts[myedegrees[k].pid])) - k = j; - } - - to = myedegrees[k].pid; - - j = 0; - if (xgain+myedegrees[k].gv > 0 || myedegrees[k].ed-myrinfo->id > 0) - j = 1; - else if (myedegrees[k].ed-myrinfo->id == 0) { - if ((iii&5) == 0 || phtable[myedegrees[k].pid] == 2 || pwgts[from] >= maxwgt[from] || itpwgts[from]*(pwgts[to]+vwgt) < itpwgts[to]*pwgts[from]) - j = 1; - } - - if (j == 0) - continue; - - for (j=0; jmincut -= myedegrees[k].ed-myrinfo->id; - graph->minvol -= (xgain+myedegrees[k].gv); - where[i] = to; - - IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("\t\tMoving %6d from %3d to %3d. Gain: [%4d %4d]. Cut: %6d, Vol: %6d\n", - i, from, to, xgain+myedegrees[k].gv, myedegrees[k].ed-myrinfo->id, graph->mincut, graph->minvol)); - - /* Update pmat to reflect the move of 'i' */ - pmat[from*nparts+to] += (myrinfo->id-myedegrees[k].ed); - pmat[to*nparts+from] += (myrinfo->id-myedegrees[k].ed); - if (pmat[from*nparts+to] == 0) { - ndoms[from]--; - if (ndoms[from]+1 == maxndoms) - maxndoms = ndoms[idxamax(nparts, ndoms)]; - } - if (pmat[to*nparts+from] == 0) { - ndoms[to]--; - if (ndoms[to]+1 == maxndoms) - maxndoms = ndoms[idxamax(nparts, ndoms)]; - } - - for (j=xadj[i]; j maxndoms) { - IFSET(ctrl->dbglvl, DBG_REFINE, printf("You just increased the maxndoms: %d %d\n", ndoms[me], maxndoms)); - maxndoms = ndoms[me]; - } - } - if (pmat[to*nparts+me] == 0) { - ndoms[to]++; - if (ndoms[to] > maxndoms) { - IFSET(ctrl->dbglvl, DBG_REFINE, printf("You just increased the maxndoms: %d %d\n", ndoms[to], maxndoms)); - maxndoms = ndoms[to]; - } - } - pmat[me*nparts+to] += adjwgt[j]; - pmat[to*nparts+me] += adjwgt[j]; - } - } - - KWayVolUpdate(ctrl, graph, i, from, to, marker, phtable, updind); - - nmoves++; - - /* CheckVolKWayPartitionParams(ctrl, graph, nparts); */ - } - } - - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("\t[%6d %6d], Balance: %5.3f, Nb: %6d. Nmoves: %5d, Cut: %6d, Vol: %6d\n", - pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], - 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nbnd, nmoves, graph->mincut, - graph->minvol)); - - if (graph->minvol == oldvol && graph->mincut == oldcut) - break; - } - - GKfree((void **)(&marker), &updind, &phtable, LTERM); - - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nvtxs); -} - - - - -/************************************************************************* -* This function performs k-way refinement -**************************************************************************/ -void Greedy_KWayVolBalance(CtrlType *ctrl, GraphType *graph, int nparts, float *tpwgts, - float ubfactor, int npasses) -{ - int i, ii, iii, j, jj, k, kk, l, u, pass, nvtxs, nmoves, tvwgt, myndegrees, xgain; - int from, me, to, vwgt, gain; - idxtype *xadj, *adjncy, *adjwgt; - idxtype *where, *pwgts, *perm, *moved, *bndptr, *bndind, *minwgt, *maxwgt, *itpwgts, *updind, *marker, *phtable; - VEDegreeType *myedegrees; - VRInfoType *myrinfo; - PQueueType queue; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - bndptr = graph->bndptr; - bndind = graph->bndind; - - where = graph->where; - pwgts = graph->pwgts; - - /* Setup the weight intervals of the various subdomains */ - minwgt = idxwspacemalloc(ctrl, nparts); - maxwgt = idxwspacemalloc(ctrl, nparts); - itpwgts = idxwspacemalloc(ctrl, nparts); - tvwgt = idxsum(nparts, pwgts); - ASSERT(tvwgt == idxsum(nvtxs, graph->vwgt)); - - updind = idxmalloc(nvtxs, "Random_KWayVolRefine: updind"); - marker = idxsmalloc(nvtxs, 0, "Random_KWayVolRefine: marker"); - phtable = idxsmalloc(nparts, -1, "Random_KWayVolRefine: phtable"); - - for (i=0; iadjwgtsum[idxamax(nvtxs, graph->adjwgtsum)]); - - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("VolPart: [%5d %5d]-[%5d %5d], Balance: %3.2f, Nv-Nb[%5d %5d]. Cut: %5d, Vol: %5d [B]\n", - pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], minwgt[0], maxwgt[0], - 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nvtxs, graph->nbnd, - graph->mincut, graph->minvol)); - - - for (pass=0; passmincut); - /* Check to see if things are out of balance, given the tolerance */ - for (i=0; i maxwgt[i]) - break; - } - if (i == nparts) /* Things are balanced. Return right away */ - break; - - PQueueReset(&queue); - idxset(nvtxs, -1, moved); - - RandomPermute(graph->nbnd, perm, 1); - for (ii=0; iinbnd; ii++) { - i = bndind[perm[ii]]; - PQueueInsert(&queue, i, graph->vrinfo[i].gv); - moved[i] = 2; - } - - for (nmoves=0;;) { - if ((i = PQueueGetMax(&queue)) == -1) - break; - moved[i] = 1; - - myrinfo = graph->vrinfo+i; - from = where[i]; - vwgt = graph->vwgt[i]; - - if (pwgts[from]-vwgt < minwgt[from]) - continue; /* This cannot be moved! */ - - xgain = (myrinfo->id == 0 && myrinfo->ed > 0 ? graph->vsize[i] : 0); - - myedegrees = myrinfo->edegrees; - myndegrees = myrinfo->ndegrees; - - for (k=0; k minwgt[to] && - (xgain+myedegrees[k].gv < 0 || - (xgain+myedegrees[k].gv == 0 && myedegrees[k].ed-myrinfo->id < 0)) - ) - continue; - - - /*===================================================================== - * If we got here, we can now move the vertex from 'from' to 'to' - *======================================================================*/ - INC_DEC(pwgts[to], pwgts[from], vwgt); - graph->mincut -= myedegrees[k].ed-myrinfo->id; - graph->minvol -= (xgain+myedegrees[k].gv); - where[i] = to; - - IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("\t\tMoving %6d from %3d to %3d. Gain: [%4d %4d]. Cut: %6d, Vol: %6d\n", - i, from, to, xgain+myedegrees[k].gv, myedegrees[k].ed-myrinfo->id, graph->mincut, graph->minvol)); - - KWayVolUpdate(ctrl, graph, i, from, to, marker, phtable, updind); - - nmoves++; - - /*CheckVolKWayPartitionParams(ctrl, graph, nparts); */ - } - - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("\t[%6d %6d], Balance: %5.3f, Nb: %6d. Nmoves: %5d, Cut: %6d, Vol: %6d\n", - pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], - 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nbnd, nmoves, graph->mincut, - graph->minvol)); - - } - - GKfree((void **)(&marker), &updind, &phtable, LTERM); - - PQueueFree(ctrl, &queue); - - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); -} - - - -/************************************************************************* -* This function performs k-way refinement -**************************************************************************/ -void Greedy_KWayVolBalanceMConn(CtrlType *ctrl, GraphType *graph, int nparts, float *tpwgts, - float ubfactor, int npasses) -{ - int i, ii, iii, j, jj, k, kk, l, u, pass, nvtxs, nmoves, tvwgt, myndegrees, xgain; - int from, me, to, vwgt, gain, maxndoms, nadd; - idxtype *xadj, *adjncy, *adjwgt; - idxtype *where, *pwgts, *perm, *moved, *bndptr, *bndind, *minwgt, *maxwgt, *itpwgts, *updind, *marker, *phtable; - idxtype *pmat, *pmatptr, *ndoms; - VEDegreeType *myedegrees; - VRInfoType *myrinfo; - PQueueType queue; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - bndptr = graph->bndptr; - bndind = graph->bndind; - - where = graph->where; - pwgts = graph->pwgts; - - /* Setup the weight intervals of the various subdomains */ - minwgt = idxwspacemalloc(ctrl, nparts); - maxwgt = idxwspacemalloc(ctrl, nparts); - itpwgts = idxwspacemalloc(ctrl, nparts); - tvwgt = idxsum(nparts, pwgts); - ASSERT(tvwgt == idxsum(nvtxs, graph->vwgt)); - - updind = idxmalloc(nvtxs, "Random_KWayVolRefine: updind"); - marker = idxsmalloc(nvtxs, 0, "Random_KWayVolRefine: marker"); - phtable = idxsmalloc(nparts, -1, "Random_KWayVolRefine: phtable"); - - pmat = ctrl->wspace.pmat; - ndoms = idxwspacemalloc(ctrl, nparts); - - ComputeVolSubDomainGraph(graph, nparts, pmat, ndoms); - - for (i=0; iadjwgtsum[idxamax(nvtxs, graph->adjwgtsum)]); - - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("VolPart: [%5d %5d]-[%5d %5d], Balance: %3.2f, Nv-Nb[%5d %5d]. Cut: %5d, Vol: %5d [B]\n", - pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], minwgt[0], maxwgt[0], - 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nvtxs, graph->nbnd, - graph->mincut, graph->minvol)); - - - for (pass=0; passmincut); - /* Check to see if things are out of balance, given the tolerance */ - for (i=0; i maxwgt[i]) - break; - } - if (i == nparts) /* Things are balanced. Return right away */ - break; - - PQueueReset(&queue); - idxset(nvtxs, -1, moved); - - RandomPermute(graph->nbnd, perm, 1); - for (ii=0; iinbnd; ii++) { - i = bndind[perm[ii]]; - PQueueInsert(&queue, i, graph->vrinfo[i].gv); - moved[i] = 2; - } - - maxndoms = ndoms[idxamax(nparts, ndoms)]; - - for (nmoves=0;;) { - if ((i = PQueueGetMax(&queue)) == -1) - break; - moved[i] = 1; - - myrinfo = graph->vrinfo+i; - from = where[i]; - vwgt = graph->vwgt[i]; - - if (pwgts[from]-vwgt < minwgt[from]) - continue; /* This cannot be moved! */ - - xgain = (myrinfo->id == 0 && myrinfo->ed > 0 ? graph->vsize[i] : 0); - - myedegrees = myrinfo->edegrees; - myndegrees = myrinfo->ndegrees; - - /* Determine the valid domains */ - for (j=0; j maxndoms-1) { - phtable[to] = 0; - nadd = maxndoms; - break; - } - nadd++; - } - } - if (ndoms[to]+nadd > maxndoms) - phtable[to] = 0; - } - - for (k=0; k minwgt[to] && - (xgain+myedegrees[k].gv < 0 || - (xgain+myedegrees[k].gv == 0 && myedegrees[k].ed-myrinfo->id < 0)) - ) - continue; - - - /*===================================================================== - * If we got here, we can now move the vertex from 'from' to 'to' - *======================================================================*/ - INC_DEC(pwgts[to], pwgts[from], vwgt); - graph->mincut -= myedegrees[k].ed-myrinfo->id; - graph->minvol -= (xgain+myedegrees[k].gv); - where[i] = to; - - IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("\t\tMoving %6d from %3d to %3d. Gain: [%4d %4d]. Cut: %6d, Vol: %6d\n", - i, from, to, xgain+myedegrees[k].gv, myedegrees[k].ed-myrinfo->id, graph->mincut, graph->minvol)); - - /* Update pmat to reflect the move of 'i' */ - pmat[from*nparts+to] += (myrinfo->id-myedegrees[k].ed); - pmat[to*nparts+from] += (myrinfo->id-myedegrees[k].ed); - if (pmat[from*nparts+to] == 0) { - ndoms[from]--; - if (ndoms[from]+1 == maxndoms) - maxndoms = ndoms[idxamax(nparts, ndoms)]; - } - if (pmat[to*nparts+from] == 0) { - ndoms[to]--; - if (ndoms[to]+1 == maxndoms) - maxndoms = ndoms[idxamax(nparts, ndoms)]; - } - - for (j=xadj[i]; j maxndoms) { - IFSET(ctrl->dbglvl, DBG_REFINE, printf("You just increased the maxndoms: %d %d\n", ndoms[me], maxndoms)); - maxndoms = ndoms[me]; - } - } - if (pmat[to*nparts+me] == 0) { - ndoms[to]++; - if (ndoms[to] > maxndoms) { - IFSET(ctrl->dbglvl, DBG_REFINE, printf("You just increased the maxndoms: %d %d\n", ndoms[to], maxndoms)); - maxndoms = ndoms[to]; - } - } - pmat[me*nparts+to] += adjwgt[j]; - pmat[to*nparts+me] += adjwgt[j]; - } - } - - KWayVolUpdate(ctrl, graph, i, from, to, marker, phtable, updind); - - nmoves++; - - /*CheckVolKWayPartitionParams(ctrl, graph, nparts); */ - } - - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("\t[%6d %6d], Balance: %5.3f, Nb: %6d. Nmoves: %5d, Cut: %6d, Vol: %6d\n", - pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], - 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nbnd, nmoves, graph->mincut, - graph->minvol)); - - } - - GKfree((void **)(&marker), &updind, &phtable, LTERM); - - PQueueFree(ctrl, &queue); - - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); -} - - - - -/************************************************************************* -* This function updates the edge and volume gains as a result of moving -* v from 'from' to 'to'. -* The working arrays marker and phtable are assumed to be initialized to -* -1, and they left to -1 upon return -**************************************************************************/ -void KWayVolUpdate(CtrlType *ctrl, GraphType *graph, int v, int from, int to, - idxtype *marker, idxtype *phtable, idxtype *updind) -{ - int ii, iii, j, jj, k, kk, l, u, nupd, other, me, myidx; - idxtype *xadj, *vsize, *adjncy, *adjwgt, *where; - VEDegreeType *myedegrees, *oedegrees; - VRInfoType *myrinfo, *orinfo; - - xadj = graph->xadj; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - vsize = graph->vsize; - where = graph->where; - - myrinfo = graph->vrinfo+v; - myedegrees = myrinfo->edegrees; - - - /*====================================================================== - * Remove the contributions on the gain made by 'v'. - *=====================================================================*/ - for (k=0; kndegrees; k++) - phtable[myedegrees[k].pid] = k; - phtable[from] = k; - - myidx = phtable[to]; /* Keep track of the index in myedegrees of the 'to' domain */ - - for (j=xadj[v]; jvrinfo+ii; - oedegrees = orinfo->edegrees; - - if (other == from) { - for (k=0; kndegrees; k++) { - if (phtable[oedegrees[k].pid] == -1) - oedegrees[k].gv += vsize[v]; - } - } - else { - ASSERT(phtable[other] != -1); - - if (myedegrees[phtable[other]].ned > 1) { - for (k=0; kndegrees; k++) { - if (phtable[oedegrees[k].pid] == -1) - oedegrees[k].gv += vsize[v]; - } - } - else { /* There is only one connection */ - for (k=0; kndegrees; k++) { - if (phtable[oedegrees[k].pid] != -1) - oedegrees[k].gv -= vsize[v]; - } - } - } - } - - for (k=0; kndegrees; k++) - phtable[myedegrees[k].pid] = -1; - phtable[from] = -1; - - - /*====================================================================== - * Update the id/ed of vertex 'v' - *=====================================================================*/ - myrinfo->ed += myrinfo->id-myedegrees[myidx].ed; - SWAP(myrinfo->id, myedegrees[myidx].ed, j); - SWAP(myrinfo->nid, myedegrees[myidx].ned, j); - if (myedegrees[myidx].ed == 0) - myedegrees[myidx] = myedegrees[--myrinfo->ndegrees]; - else - myedegrees[myidx].pid = from; - - /*====================================================================== - * Update the degrees of adjacent vertices and their volume gains - *=====================================================================*/ - marker[v] = 1; - updind[0] = v; - nupd = 1; - for (j=xadj[v]; jvrinfo+ii; - if (myrinfo->edegrees == NULL) { - myrinfo->edegrees = ctrl->wspace.vedegrees+ctrl->wspace.cdegree; - ctrl->wspace.cdegree += xadj[ii+1]-xadj[ii]; - } - myedegrees = myrinfo->edegrees; - - if (me == from) { - INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); - myrinfo->nid--; - } - else if (me == to) { - INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); - myrinfo->nid++; - } - - /* Remove the edgeweight from the 'pid == from' entry of the vertex */ - if (me != from) { - for (k=0; kndegrees; k++) { - if (myedegrees[k].pid == from) { - if (myedegrees[k].ned == 1) { - myedegrees[k] = myedegrees[--myrinfo->ndegrees]; - marker[ii] = 1; /* You do a complete .gv calculation */ - - /* All vertices adjacent to 'ii' need to be updated */ - for (jj=xadj[ii]; jjvrinfo+u; - oedegrees = orinfo->edegrees; - - for (kk=0; kkndegrees; kk++) { - if (oedegrees[kk].pid == from) { - oedegrees[kk].gv -= vsize[ii]; - break; - } - } - } - } - else { - myedegrees[k].ed -= adjwgt[j]; - myedegrees[k].ned--; - - /* Update the gv due to single 'ii' connection to 'from' */ - if (myedegrees[k].ned == 1) { - /* find the vertex 'u' that 'ii' was connected into 'from' */ - for (jj=xadj[ii]; jjvrinfo+u; - oedegrees = orinfo->edegrees; - - if (other == from) { - for (kk=0; kkndegrees; kk++) - oedegrees[kk].gv += vsize[ii]; - break; - } - } - } - } - - break; - } - } - } - - /* Add the edgeweight to the 'pid == to' entry of the vertex */ - if (me != to) { - for (k=0; kndegrees; k++) { - if (myedegrees[k].pid == to) { - myedegrees[k].ed += adjwgt[j]; - myedegrees[k].ned++; - - /* Update the gv due to non-single 'ii' connection to 'to' */ - if (myedegrees[k].ned == 2) { - /* find the vertex 'u' that 'ii' was connected into 'to' */ - for (jj=xadj[ii]; jjvrinfo+u; - oedegrees = orinfo->edegrees; - - if (u != v && other == to) { - for (kk=0; kkndegrees; kk++) - oedegrees[kk].gv -= vsize[ii]; - break; - } - } - } - break; - } - } - - if (k == myrinfo->ndegrees) { - myedegrees[myrinfo->ndegrees].pid = to; - myedegrees[myrinfo->ndegrees].ed = adjwgt[j]; - myedegrees[myrinfo->ndegrees++].ned = 1; - marker[ii] = 1; /* You do a complete .gv calculation */ - - /* All vertices adjacent to 'ii' need to be updated */ - for (jj=xadj[ii]; jjvrinfo+u; - oedegrees = orinfo->edegrees; - - for (kk=0; kkndegrees; kk++) { - if (oedegrees[kk].pid == to) { - oedegrees[kk].gv += vsize[ii]; - if (!marker[u]) { /* Need to update boundary etc */ - marker[u] = 2; - updind[nupd++] = u; - } - break; - } - } - } - } - } - - ASSERT(myrinfo->ndegrees <= xadj[ii+1]-xadj[ii]); - } - - /*====================================================================== - * Add the contributions on the volume gain due to 'v' - *=====================================================================*/ - myrinfo = graph->vrinfo+v; - myedegrees = myrinfo->edegrees; - for (k=0; kndegrees; k++) - phtable[myedegrees[k].pid] = k; - phtable[to] = k; - - for (j=xadj[v]; jvrinfo+ii; - oedegrees = orinfo->edegrees; - - if (other == to) { - for (k=0; kndegrees; k++) { - if (phtable[oedegrees[k].pid] == -1) - oedegrees[k].gv -= vsize[v]; - } - } - else { - ASSERT(phtable[other] != -1); - - if (myedegrees[phtable[other]].ned > 1) { - for (k=0; kndegrees; k++) { - if (phtable[oedegrees[k].pid] == -1) - oedegrees[k].gv -= vsize[v]; - } - } - else { /* There is only one connection */ - for (k=0; kndegrees; k++) { - if (phtable[oedegrees[k].pid] != -1) - oedegrees[k].gv += vsize[v]; - } - } - } - } - for (k=0; kndegrees; k++) - phtable[myedegrees[k].pid] = -1; - phtable[to] = -1; - - - /*====================================================================== - * Recompute the volume information of the 'hard' nodes, and update the - * max volume gain for all the update vertices - *=====================================================================*/ - ComputeKWayVolume(graph, nupd, updind, marker, phtable); - - - /*====================================================================== - * Maintain a consistent boundary - *=====================================================================*/ - for (j=0; jvrinfo+k; - - if ((myrinfo->gv >= 0 || myrinfo->ed-myrinfo->id >= 0) && graph->bndptr[k] == -1) - BNDInsert(graph->nbnd, graph->bndind, graph->bndptr, k); - - if (myrinfo->gv < 0 && myrinfo->ed-myrinfo->id < 0 && graph->bndptr[k] != -1) - BNDDelete(graph->nbnd, graph->bndind, graph->bndptr, k); - } - -} - - - - -/************************************************************************* -* This function computes the initial id/ed -**************************************************************************/ -void ComputeKWayVolume(GraphType *graph, int nupd, idxtype *updind, idxtype *marker, idxtype *phtable) -{ - int ii, iii, i, j, k, kk, l, nvtxs, me, other, pid; - idxtype *xadj, *vsize, *adjncy, *adjwgt, *where; - VRInfoType *rinfo, *myrinfo, *orinfo; - VEDegreeType *myedegrees, *oedegrees; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - vsize = graph->vsize; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - where = graph->where; - rinfo = graph->vrinfo; - - - /*------------------------------------------------------------ - / Compute now the iv/ev degrees - /------------------------------------------------------------*/ - for (iii=0; iiiedegrees; - - if (marker[i] == 1) { /* Only complete gain updates go through */ - for (k=0; kndegrees; k++) - myedegrees[k].gv = 0; - - for (j=xadj[i]; jedegrees; - - for (kk=0; kkndegrees; kk++) - phtable[oedegrees[kk].pid] = kk; - phtable[other] = 1; - - if (me == other) { - /* Find which domains 'i' is connected and 'ii' is not and update their gain */ - for (k=0; kndegrees; k++) { - if (phtable[myedegrees[k].pid] == -1) - myedegrees[k].gv -= vsize[ii]; - } - } - else { - ASSERT(phtable[me] != -1); - - /* I'm the only connection of 'ii' in 'me' */ - if (oedegrees[phtable[me]].ned == 1) { - /* Increase the gains for all the common domains between 'i' and 'ii' */ - for (k=0; kndegrees; k++) { - if (phtable[myedegrees[k].pid] != -1) - myedegrees[k].gv += vsize[ii]; - } - } - else { - /* Find which domains 'i' is connected and 'ii' is not and update their gain */ - for (k=0; kndegrees; k++) { - if (phtable[myedegrees[k].pid] == -1) - myedegrees[k].gv -= vsize[ii]; - } - } - } - - for (kk=0; kkndegrees; kk++) - phtable[oedegrees[kk].pid] = -1; - phtable[other] = -1; - - } - } - - myrinfo->gv = -MAXIDX; - for (k=0; kndegrees; k++) { - if (myedegrees[k].gv > myrinfo->gv) - myrinfo->gv = myedegrees[k].gv; - } - if (myrinfo->ed > 0 && myrinfo->id == 0) - myrinfo->gv += vsize[i]; - - } - -} - - - -/************************************************************************* -* This function computes the total volume -**************************************************************************/ -int ComputeVolume(GraphType *graph, idxtype *where) -{ - int i, j, k, me, nvtxs, nparts, totalv; - idxtype *xadj, *adjncy, *vsize, *marker; - - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; - vsize = (graph->vsize == NULL ? graph->vwgt : graph->vsize); - - nparts = where[idxamax(nvtxs, where)]+1; - marker = idxsmalloc(nparts, -1, "ComputeVolume: marker"); - - totalv = 0; - - for (i=0; invtxs; - xadj = graph->xadj; - vsize = graph->vsize; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - where = graph->where; - rinfo = graph->vrinfo; - - tmpdegrees = (VEDegreeType *)GKmalloc(nparts*sizeof(VEDegreeType), "CheckVolKWayPartitionParams: tmpdegrees"); - - /*------------------------------------------------------------ - / Compute now the iv/ev degrees - /------------------------------------------------------------*/ - for (i=0; iedegrees; - - for (k=0; kndegrees; k++) - tmpdegrees[k] = myedegrees[k]; - - tmprinfo.ndegrees = myrinfo->ndegrees; - tmprinfo.id = myrinfo->id; - tmprinfo.ed = myrinfo->ed; - - myrinfo = &tmprinfo; - myedegrees = tmpdegrees; - - - for (k=0; kndegrees; k++) - myedegrees[k].gv = 0; - - for (j=xadj[i]; jedegrees; - - if (me == other) { - /* Find which domains 'i' is connected and 'ii' is not and update their gain */ - for (k=0; kndegrees; k++) { - pid = myedegrees[k].pid; - for (kk=0; kkndegrees; kk++) { - if (oedegrees[kk].pid == pid) - break; - } - if (kk == orinfo->ndegrees) - myedegrees[k].gv -= vsize[ii]; - } - } - else { - /* Find the orinfo[me].ed and see if I'm the only connection */ - for (k=0; kndegrees; k++) { - if (oedegrees[k].pid == me) - break; - } - - if (oedegrees[k].ned == 1) { /* I'm the only connection of 'ii' in 'me' */ - for (k=0; kndegrees; k++) { - if (myedegrees[k].pid == other) { - myedegrees[k].gv += vsize[ii]; - break; - } - } - - /* Increase the gains for all the common domains between 'i' and 'ii' */ - for (k=0; kndegrees; k++) { - if ((pid = myedegrees[k].pid) == other) - continue; - for (kk=0; kkndegrees; kk++) { - if (oedegrees[kk].pid == pid) { - myedegrees[k].gv += vsize[ii]; - break; - } - } - } - - } - else { - /* Find which domains 'i' is connected and 'ii' is not and update their gain */ - for (k=0; kndegrees; k++) { - if ((pid = myedegrees[k].pid) == other) - continue; - for (kk=0; kkndegrees; kk++) { - if (oedegrees[kk].pid == pid) - break; - } - if (kk == orinfo->ndegrees) - myedegrees[k].gv -= vsize[ii]; - } - } - } - } - - myrinfo = rinfo+i; - myedegrees = myrinfo->edegrees; - - for (k=0; kndegrees; k++) { - pid = myedegrees[k].pid; - for (kk=0; kknvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - where = graph->where; - rinfo = graph->vrinfo; - - idxset(nparts*nparts, 0, pmat); - - for (i=0; i 0) { - me = where[i]; - ndegrees = rinfo[i].ndegrees; - edegrees = rinfo[i].edegrees; - - k = me*nparts; - for (j=0; j 0) - ndoms[i]++; - } - } -} - - - -/************************************************************************* -* This function computes the subdomain graph -**************************************************************************/ -void EliminateVolSubDomainEdges(CtrlType *ctrl, GraphType *graph, int nparts, float *tpwgts) -{ - int i, ii, j, k, me, other, nvtxs, total, max, avg, totalout, nind, ncand, ncand2, target, target2, nadd; - int min, move, cpwgt, tvwgt; - idxtype *xadj, *adjncy, *vwgt, *adjwgt, *pwgts, *where, *maxpwgt, *pmat, *ndoms, *mypmat, *otherpmat, *ind; - KeyValueType *cand, *cand2; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; - vwgt = graph->vwgt; - adjwgt = graph->adjwgt; - - where = graph->where; - pwgts = idxset(nparts, 0, graph->pwgts); - - maxpwgt = idxwspacemalloc(ctrl, nparts); - ndoms = idxwspacemalloc(ctrl, nparts); - otherpmat = idxwspacemalloc(ctrl, nparts); - ind = idxwspacemalloc(ctrl, nvtxs); - pmat = idxset(nparts*nparts, 0, ctrl->wspace.pmat); - - cand = (KeyValueType *)GKmalloc(nparts*sizeof(KeyValueType), "EliminateSubDomainEdges: cand"); - cand2 = (KeyValueType *)GKmalloc(nparts*sizeof(KeyValueType), "EliminateSubDomainEdges: cand"); - - /* Compute the pmat matrix */ - for (i=0; i 0) - k++; - } - ndoms[i] = k; - } - - /* Get into the loop eliminating subdomain connections */ - for (;;) { - total = idxsum(nparts, ndoms); - avg = total/nparts; - max = ndoms[idxamax(nparts, ndoms)]; - - /* printf("Adjacent Subdomain Stats: Total: %3d, Max: %3d, Avg: %3d\n", total, max, avg); */ - - if (max < 1.5*avg) - break; - - me = idxamax(nparts, ndoms); - mypmat = pmat + me*nparts; - totalout = idxsum(nparts, mypmat); - - /*printf("Me: %d, TotalOut: %d,\n", me, totalout);*/ - - /* Sort the connections according to their cut */ - for (ncand2=0, i=0; i 0) { - cand2[ncand2].key = mypmat[i]; - cand2[ncand2++].val = i; - } - } - ikeysort(ncand2, cand2); - - move = 0; - for (min=0; min totalout/(2*ndoms[me])) - break; - - other = cand2[min].val; - - /*printf("\tMinOut: %d to %d\n", mypmat[other], other);*/ - - idxset(nparts, 0, otherpmat); - - /* Go and find the vertices in 'other' that are connected in 'me' */ - for (nind=0, i=0; i 0) { - cand[ncand].key = -otherpmat[i]; - cand[ncand++].val = i; - } - } - ikeysort(ncand, cand); - - /* - * Go through and the select the first domain that is common with 'me', and - * does not increase the ndoms[target] higher than my ndoms, subject to the - * maxpwgt constraint. Traversal is done from the mostly connected to the least. - */ - target = target2 = -1; - for (i=0; i 0) { - if (pwgts[k] + cpwgt > maxpwgt[k]) /* Check if balance will go off */ - continue; - - for (j=0; j 0 && ndoms[j] >= ndoms[me]-1 && pmat[nparts*j+k] == 0) - break; - } - if (j == nparts) { /* No bad second level effects */ - for (nadd=0, j=0; j 0 && pmat[nparts*k+j] == 0) - nadd++; - } - - /*printf("\t\tto=%d, nadd=%d, %d\n", k, nadd, ndoms[k]);*/ - if (target2 == -1 && ndoms[k]+nadd < ndoms[me]) { - target2 = k; - } - if (nadd == 0) { - target = k; - break; - } - } - } - } - if (target == -1 && target2 != -1) - target = target2; - - if (target == -1) { - /* printf("\t\tCould not make the move\n");*/ - continue; - } - - /*printf("\t\tMoving to %d\n", target);*/ - - /* Update the partition weights */ - INC_DEC(pwgts[target], pwgts[other], cpwgt); - - /* Set all nind vertices to belong to 'target' */ - for (ii=0; iiwhere and tries to push them around to -* remove some of them -**************************************************************************/ -void EliminateVolComponents(CtrlType *ctrl, GraphType *graph, int nparts, float *tpwgts, float ubfactor) -{ - int i, ii, j, jj, k, me, nvtxs, tvwgt, first, last, nleft, ncmps, cwgt, ncand, other, target, deltawgt; - idxtype *xadj, *adjncy, *vwgt, *adjwgt, *where, *pwgts, *maxpwgt; - idxtype *cpvec, *touched, *perm, *todo, *cind, *cptr, *npcmps; - KeyValueType *cand; - int recompute=0; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; - vwgt = graph->vwgt; - adjwgt = graph->adjwgt; - - where = graph->where; - pwgts = idxset(nparts, 0, graph->pwgts); - - touched = idxset(nvtxs, 0, idxwspacemalloc(ctrl, nvtxs)); - cptr = idxwspacemalloc(ctrl, nvtxs+1); - cind = idxwspacemalloc(ctrl, nvtxs); - perm = idxwspacemalloc(ctrl, nvtxs); - todo = idxwspacemalloc(ctrl, nvtxs); - maxpwgt = idxwspacemalloc(ctrl, nparts); - cpvec = idxwspacemalloc(ctrl, nparts); - npcmps = idxset(nparts, 0, idxwspacemalloc(ctrl, nparts)); - - for (i=0; i 0) { - if (first == last) { /* Find another starting vertex */ - cptr[++ncmps] = first; - ASSERT(touched[todo[0]] == 0); - i = todo[0]; - cind[last++] = i; - touched[i] = 1; - me = where[i]; - npcmps[me]++; - } - - i = cind[first++]; - k = perm[i]; - j = todo[k] = todo[--nleft]; - perm[j] = k; - - for (j=xadj[i]; j nparts) { /* There are more components than processors */ - cand = (KeyValueType *)GKmalloc(nparts*sizeof(KeyValueType), "EliminateSubDomainEdges: cand"); - - /* First determine the partition sizes and max allowed load imbalance */ - for (i=0; i .30*pwgts[me]) - continue; /* Skip the component if it is over 30% of the weight */ - - for (ncand=0, j=0; j 0) { - cand[ncand].key = -cpvec[j]; - cand[ncand++].val = j; - } - } - if (ncand == 0) - continue; - - ikeysort(ncand, cand); - - target = -1; - for (j=0; jmincut -= cpvec[target]; - recompute = 1; - } - } - GKfree((void **)&cand, LTERM); - } - - if (recompute) { - int ttlv; - idxtype *marker; - - marker = idxset(nparts, -1, cpvec); - for (ttlv=0, i=0; ivsize[i]; - marker[where[adjncy[j]]] = i; - } - } - } - graph->minvol = ttlv; - } - - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs+1); - -} - diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kwayvolrefine.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kwayvolrefine.c deleted file mode 100644 index 0e8c6f62..00000000 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/kwayvolrefine.c +++ /dev/null @@ -1,468 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * kwayvolrefine.c - * - * This file contains the driving routines for multilevel k-way refinement - * - * Started 7/28/97 - * George - * - * $Id: kwayvolrefine.c,v 1.1 2003/07/16 15:55:05 karypis Exp $ - */ - -#include - - -/************************************************************************* -* This function is the entry point of refinement -**************************************************************************/ -void RefineVolKWay(CtrlType *ctrl, GraphType *orggraph, GraphType *graph, int nparts, - float *tpwgts, float ubfactor) -{ - int i, nlevels; - GraphType *ptr; - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->UncoarsenTmr)); - - /* Take care any non-contiguity */ - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->AuxTmr1)); - if (ctrl->RType == RTYPE_KWAYRANDOM_MCONN) { - ComputeVolKWayPartitionParams(ctrl, graph, nparts); - EliminateVolComponents(ctrl, graph, nparts, tpwgts, 1.25); - EliminateVolSubDomainEdges(ctrl, graph, nparts, tpwgts); - EliminateVolComponents(ctrl, graph, nparts, tpwgts, 1.25); - } - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->AuxTmr1)); - - - /* Determine how many levels are there */ - for (ptr=graph, nlevels=0; ptr!=orggraph; ptr=ptr->finer, nlevels++); - - /* Compute the parameters of the coarsest graph */ - ComputeVolKWayPartitionParams(ctrl, graph, nparts); - - for (i=0; ;i++) { - /*PrintSubDomainGraph(graph, nparts, graph->where);*/ - MALLOC_CHECK(NULL); - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->RefTmr)); - - if (2*i >= nlevels && !IsBalanced(graph->pwgts, nparts, tpwgts, 1.04*ubfactor)) { - ComputeVolKWayBalanceBoundary(ctrl, graph, nparts); - switch (ctrl->RType) { - case RTYPE_KWAYRANDOM: - Greedy_KWayVolBalance(ctrl, graph, nparts, tpwgts, ubfactor, 1); - break; - case RTYPE_KWAYRANDOM_MCONN: - Greedy_KWayVolBalanceMConn(ctrl, graph, nparts, tpwgts, ubfactor, 1); - break; - } - ComputeVolKWayBoundary(ctrl, graph, nparts); - } - - switch (ctrl->RType) { - case RTYPE_KWAYRANDOM: - Random_KWayVolRefine(ctrl, graph, nparts, tpwgts, ubfactor, 10, 1); - break; - case RTYPE_KWAYRANDOM_MCONN: - Random_KWayVolRefineMConn(ctrl, graph, nparts, tpwgts, ubfactor, 10, 1); - break; - } - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->RefTmr)); - - if (graph == orggraph) - break; - - GKfree((void **)&graph->gdata, LTERM); /* Deallocate the graph related arrays */ - - graph = graph->finer; - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ProjectTmr)); - ProjectVolKWayPartition(ctrl, graph, nparts); - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ProjectTmr)); - } - - if (!IsBalanced(graph->pwgts, nparts, tpwgts, ubfactor)) { - ComputeVolKWayBalanceBoundary(ctrl, graph, nparts); - switch (ctrl->RType) { - case RTYPE_KWAYRANDOM: - Greedy_KWayVolBalance(ctrl, graph, nparts, tpwgts, ubfactor, 8); - Random_KWayVolRefine(ctrl, graph, nparts, tpwgts, ubfactor, 10, 0); - break; - case RTYPE_KWAYRANDOM_MCONN: - Greedy_KWayVolBalanceMConn(ctrl, graph, nparts, tpwgts, ubfactor, 8); - Random_KWayVolRefineMConn(ctrl, graph, nparts, tpwgts, ubfactor, 10, 0); - break; - } - } - - EliminateVolComponents(ctrl, graph, nparts, tpwgts, ubfactor); - - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->UncoarsenTmr)); -} - - - -/************************************************************************* -* This function allocates memory for k-way edge refinement -**************************************************************************/ -void AllocateVolKWayPartitionMemory(CtrlType *ctrl, GraphType *graph, int nparts) -{ - int nvtxs, pad64; - - nvtxs = graph->nvtxs; - - pad64 = (3*nvtxs+nparts)%2; - - graph->rdata = idxmalloc(3*nvtxs+nparts+(sizeof(VRInfoType)/sizeof(idxtype))*nvtxs+pad64, "AllocateVolKWayPartitionMemory: rdata"); - graph->pwgts = graph->rdata; - graph->where = graph->rdata + nparts; - graph->bndptr = graph->rdata + nvtxs + nparts; - graph->bndind = graph->rdata + 2*nvtxs + nparts; - graph->vrinfo = (VRInfoType *)(graph->rdata + 3*nvtxs+nparts + pad64); - -} - - - -/************************************************************************* -* This function computes the initial id/ed -**************************************************************************/ -void ComputeVolKWayPartitionParams(CtrlType *ctrl, GraphType *graph, int nparts) -{ - int i, ii, j, k, kk, l, nvtxs, nbnd, mincut, minvol, me, other, pid; - idxtype *xadj, *vwgt, *adjncy, *adjwgt, *pwgts, *where; - VRInfoType *rinfo, *myrinfo, *orinfo; - VEDegreeType *myedegrees, *oedegrees; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - vwgt = graph->vwgt; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - where = graph->where; - pwgts = idxset(nparts, 0, graph->pwgts); - rinfo = graph->vrinfo; - -starttimer(ctrl->AuxTmr1); - - /*------------------------------------------------------------ - / Compute now the id/ed degrees - /------------------------------------------------------------*/ - ctrl->wspace.cdegree = 0; - mincut = 0; - for (i=0; iid = myrinfo->ed = myrinfo->nid = myrinfo->ndegrees = 0; - myrinfo->edegrees = NULL; - - for (j=xadj[i]; jid += adjwgt[j]; - myrinfo->nid++; - } - } - myrinfo->ed = graph->adjwgtsum[i] - myrinfo->id; - - mincut += myrinfo->ed; - - /* Time to compute the particular external degrees */ - if (myrinfo->ed > 0) { - myedegrees = myrinfo->edegrees = ctrl->wspace.vedegrees+ctrl->wspace.cdegree; - ctrl->wspace.cdegree += xadj[i+1]-xadj[i]; - - for (j=xadj[i]; jndegrees; k++) { - if (myedegrees[k].pid == other) { - myedegrees[k].ed += adjwgt[j]; - myedegrees[k].ned++; - break; - } - } - if (k == myrinfo->ndegrees) { - myedegrees[myrinfo->ndegrees].gv = 0; - myedegrees[myrinfo->ndegrees].pid = other; - myedegrees[myrinfo->ndegrees].ed = adjwgt[j]; - myedegrees[myrinfo->ndegrees++].ned = 1; - } - } - } - - ASSERT(myrinfo->ndegrees <= xadj[i+1]-xadj[i]); - } - } - graph->mincut = mincut/2; - -stoptimer(ctrl->AuxTmr1); - - ComputeKWayVolGains(ctrl, graph, nparts); - -} - - - -/************************************************************************* -* This function computes the initial id/ed -**************************************************************************/ -void ComputeKWayVolGains(CtrlType *ctrl, GraphType *graph, int nparts) -{ - int i, ii, j, k, kk, l, nvtxs, me, other, pid, myndegrees; - idxtype *xadj, *vsize, *adjncy, *adjwgt, *where, *bndind, *bndptr, *ophtable; - VRInfoType *rinfo, *myrinfo, *orinfo; - VEDegreeType *myedegrees, *oedegrees; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - vsize = graph->vsize; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - where = graph->where; - bndind = graph->bndind; - bndptr = idxset(nvtxs, -1, graph->bndptr); - rinfo = graph->vrinfo; - -starttimer(ctrl->AuxTmr2); - - ophtable = idxset(nparts, -1, idxwspacemalloc(ctrl, nparts)); - - /*------------------------------------------------------------ - / Compute now the iv/ev degrees - /------------------------------------------------------------*/ - graph->minvol = graph->nbnd = 0; - for (i=0; igv = -MAXIDX; - - if (myrinfo->ndegrees > 0) { - me = where[i]; - myedegrees = myrinfo->edegrees; - myndegrees = myrinfo->ndegrees; - - graph->minvol += myndegrees*vsize[i]; - - for (j=xadj[i]; jedegrees; - - for (k=0; kndegrees; k++) - ophtable[oedegrees[k].pid] = k; - ophtable[other] = 1; /* this is to simplify coding */ - - if (me == other) { - /* Find which domains 'i' is connected and 'ii' is not and update their gain */ - for (k=0; kndegrees; kk++) - ophtable[oedegrees[kk].pid] = -1; - ophtable[other] = -1; - } - - /* Compute the max vgain */ - for (k=0; k myrinfo->gv) - myrinfo->gv = myedegrees[k].gv; - } - } - - if (myrinfo->ed > 0 && myrinfo->id == 0) - myrinfo->gv += vsize[i]; - - if (myrinfo->gv >= 0 || myrinfo->ed-myrinfo->id >= 0) - BNDInsert(graph->nbnd, bndind, bndptr, i); - } - -stoptimer(ctrl->AuxTmr2); - - idxwspacefree(ctrl, nparts); - -} - - - -/************************************************************************* -* This function projects a partition, and at the same time computes the -* parameters for refinement. -**************************************************************************/ -void ProjectVolKWayPartition(CtrlType *ctrl, GraphType *graph, int nparts) -{ - int i, j, k, nvtxs, me, other, istart, iend, ndegrees; - idxtype *xadj, *adjncy, *adjwgt, *adjwgtsum; - idxtype *cmap, *where; - idxtype *cwhere; - GraphType *cgraph; - VRInfoType *crinfo, *rinfo, *myrinfo; - VEDegreeType *myedegrees; - idxtype *htable; - - cgraph = graph->coarser; - cwhere = cgraph->where; - crinfo = cgraph->vrinfo; - - nvtxs = graph->nvtxs; - cmap = graph->cmap; - xadj = graph->xadj; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - adjwgtsum = graph->adjwgtsum; - - AllocateVolKWayPartitionMemory(ctrl, graph, nparts); - where = graph->where; - rinfo = graph->vrinfo; - - /* Go through and project partition and compute id/ed for the nodes */ - for (i=0; iwspace.cdegree = 0; - for (i=0; iid = myrinfo->ed = myrinfo->nid = myrinfo->ndegrees = 0; - myrinfo->edegrees = NULL; - - myrinfo->id = adjwgtsum[i]; - myrinfo->nid = xadj[i+1]-xadj[i]; - - if (cmap[i] > 0) { /* If it is an interface node. Note cmap[i] = crinfo[cmap[i]].ed */ - istart = xadj[i]; - iend = xadj[i+1]; - - myedegrees = myrinfo->edegrees = ctrl->wspace.vedegrees+ctrl->wspace.cdegree; - ctrl->wspace.cdegree += iend-istart; - - ndegrees = 0; - for (j=istart; jed += adjwgt[j]; - myrinfo->nid--; - if ((k = htable[other]) == -1) { - htable[other] = ndegrees; - myedegrees[ndegrees].gv = 0; - myedegrees[ndegrees].pid = other; - myedegrees[ndegrees].ed = adjwgt[j]; - myedegrees[ndegrees++].ned = 1; - } - else { - myedegrees[k].ed += adjwgt[j]; - myedegrees[k].ned++; - } - } - } - myrinfo->id -= myrinfo->ed; - - /* Remove space for edegrees if it was interior */ - if (myrinfo->ed == 0) { - myrinfo->edegrees = NULL; - ctrl->wspace.cdegree -= iend-istart; - } - else { - myrinfo->ndegrees = ndegrees; - - for (j=0; jpwgts, graph->pwgts); - graph->mincut = cgraph->mincut; - - FreeGraph(graph->coarser); - graph->coarser = NULL; - - idxwspacefree(ctrl, nparts); - -} - - - -/************************************************************************* -* This function computes the boundary definition for balancing -**************************************************************************/ -void ComputeVolKWayBoundary(CtrlType *ctrl, GraphType *graph, int nparts) -{ - int i, nvtxs, nbnd; - idxtype *bndind, *bndptr; - - nvtxs = graph->nvtxs; - bndind = graph->bndind; - bndptr = idxset(nvtxs, -1, graph->bndptr); - - - /*------------------------------------------------------------ - / Compute the new boundary - /------------------------------------------------------------*/ - nbnd = 0; - for (i=0; ivrinfo[i].gv >=0 || graph->vrinfo[i].ed-graph->vrinfo[i].id >= 0) - BNDInsert(nbnd, bndind, bndptr, i); - } - - graph->nbnd = nbnd; -} - -/************************************************************************* -* This function computes the boundary definition for balancing -**************************************************************************/ -void ComputeVolKWayBalanceBoundary(CtrlType *ctrl, GraphType *graph, int nparts) -{ - int i, nvtxs, nbnd; - idxtype *bndind, *bndptr; - - nvtxs = graph->nvtxs; - bndind = graph->bndind; - bndptr = idxset(nvtxs, -1, graph->bndptr); - - - /*------------------------------------------------------------ - / Compute the new boundary - /------------------------------------------------------------*/ - nbnd = 0; - for (i=0; ivrinfo[i].ed > 0) - BNDInsert(nbnd, bndind, bndptr, i); - } - - graph->nbnd = nbnd; -} - diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/macros.h b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/macros.h index 92d34e07..94ecf95b 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/macros.h +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/macros.h @@ -8,129 +8,251 @@ * Started 9/25/94 * George * - * $Id: macros.h,v 1.7 2003/07/21 19:11:40 karypis Exp $ + * $Id: macros.h 10060 2011-06-02 18:56:30Z karypis $ * */ +#ifndef _LIBMETIS_MACROS_H_ +#define _LIBMETIS_MACROS_H_ /************************************************************************* * The following macro returns a random number in the specified range **************************************************************************/ -#define RandomInRange(u) ((int)(1.0*(u)*rand()/(RAND_MAX+1.0))) - -#define amax(a, b) ((a) >= (b) ? (a) : (b)) -#define amin(a, b) ((a) >= (b) ? (b) : (a)) - #define AND(a, b) ((a) < 0 ? ((-(a))&(b)) : ((a)&(b))) #define OR(a, b) ((a) < 0 ? -((-(a))|(b)) : ((a)|(b))) #define XOR(a, b) ((a) < 0 ? -((-(a))^(b)) : ((a)^(b))) -#define SWAP(a, b, tmp) \ - do {(tmp) = (a); (a) = (b); (b) = (tmp);} while(0) +//#define icopy(n, a, b) (idx_t *)memcpy((void *)(b), (void *)(a), sizeof(idx_t)*(n)) -#define INC_DEC(a, b, val) \ - do {(a) += (val); (b) -= (val);} while(0) +#define HASHFCT(key, size) ((key)%(size)) +#define SWAP gk_SWAP +/* gets the appropriate option value */ +#define GETOPTION(options, idx, defval) \ + ((options) == NULL || (options)[idx] == -1 ? defval : (options)[idx]) -#define scopy(n, a, b) (float *)memcpy((void *)(b), (void *)(a), sizeof(float)*(n)) -#define idxcopy(n, a, b) (idxtype *)memcpy((void *)(b), (void *)(a), sizeof(idxtype)*(n)) +/* converts a user provided ufactor into a real ubfactor */ +#define I2RUBFACTOR(ufactor) (1.0+0.001*(ufactor)) + +/* set/reset the current workspace core */ +#define WCOREPUSH wspacepush(ctrl) +#define WCOREPOP wspacepop(ctrl) -#define HASHFCT(key, size) ((key)%(size)) /************************************************************************* -* Timer macros +* These macros insert and remove nodes from a Direct Access list **************************************************************************/ -#define cleartimer(tmr) (tmr = 0.0) -#define starttimer(tmr) (tmr -= seconds()) -#define stoptimer(tmr) (tmr += seconds()) -#define gettimer(tmr) (tmr) +#define ListInsert(n, lind, lptr, i) \ + do { \ + ASSERT(lptr[i] == -1); \ + lind[n] = i; \ + lptr[i] = (n)++;\ + } while(0) +#define ListDelete(n, lind, lptr, i) \ + do { \ + ASSERT(lptr[i] != -1); \ + lind[lptr[i]] = lind[--(n)]; \ + lptr[lind[n]] = lptr[i]; \ + lptr[i] = -1; \ + } while(0) -/************************************************************************* -* This macro is used to handle dbglvl -**************************************************************************/ -#define IFSET(a, flag, cmd) if ((a)&(flag)) (cmd); /************************************************************************* -* These macros are used for debuging memory leaks +* These macros insert and remove nodes from the boundary list **************************************************************************/ -#ifdef DMALLOC -#define imalloc(n, msg) (malloc(sizeof(int)*(n))) -#define fmalloc(n, msg) (malloc(sizeof(float)*(n))) -#define idxmalloc(n, msg) (malloc(sizeof(idxtype)*(n))) -#define ismalloc(n, val, msg) (iset((n), (val), malloc(sizeof(int)*(n)))) -#define idxsmalloc(n, val, msg) (idxset((n), (val), malloc(sizeof(idxtype)*(n)))) -#define GKmalloc(a, b) (malloc((a))) -#endif - -#ifdef DMALLOC -# define MALLOC_CHECK(ptr); -/* -# define MALLOC_CHECK(ptr) \ - if (malloc_verify((ptr)) == DMALLOC_VERIFY_ERROR) { \ - printf("***MALLOC_CHECK failed on line %d of file %s: " #ptr "\n", \ - __LINE__, __FILE__); \ - abort(); \ - } -*/ -#else -# define MALLOC_CHECK(ptr) ; -#endif +#define BNDInsert(nbnd, bndind, bndptr, vtx) \ + ListInsert(nbnd, bndind, bndptr, vtx) +#define BNDDelete(nbnd, bndind, bndptr, vtx) \ + ListDelete(nbnd, bndind, bndptr, vtx) /************************************************************************* -* This macro converts a length array in a CSR one +* These macros deal with id/ed updating during k-way refinement **************************************************************************/ -#define MAKECSR(i, n, a) \ +#define UpdateMovedVertexInfoAndBND(i, from, k, to, myrinfo, mynbrs, where, \ + nbnd, bndptr, bndind, bndtype) \ do { \ - for (i=1; i0; i--) a[i] = a[i-1]; \ - a[0] = 0; \ + where[i] = to; \ + myrinfo->ed += myrinfo->id-mynbrs[k].ed; \ + SWAP(myrinfo->id, mynbrs[k].ed, j); \ + if (mynbrs[k].ed == 0) \ + mynbrs[k] = mynbrs[--myrinfo->nnbrs]; \ + else \ + mynbrs[k].pid = from; \ + \ + /* Update the boundary information. Both deletion and addition is \ + allowed as this routine can be used for moving arbitrary nodes. */ \ + if (bndtype == BNDTYPE_REFINE) { \ + if (bndptr[i] != -1 && myrinfo->ed - myrinfo->id < 0) \ + BNDDelete(nbnd, bndind, bndptr, i); \ + if (bndptr[i] == -1 && myrinfo->ed - myrinfo->id >= 0) \ + BNDInsert(nbnd, bndind, bndptr, i); \ + } \ + else { \ + if (bndptr[i] != -1 && myrinfo->ed <= 0) \ + BNDDelete(nbnd, bndind, bndptr, i); \ + if (bndptr[i] == -1 && myrinfo->ed > 0) \ + BNDInsert(nbnd, bndind, bndptr, i); \ + } \ } while(0) -/************************************************************************* -* These macros insert and remove nodes from the boundary list -**************************************************************************/ -#define BNDInsert(nbnd, bndind, bndptr, vtx) \ +#define UpdateAdjacentVertexInfoAndBND(ctrl, vid, adjlen, me, from, to, \ + myrinfo, ewgt, nbnd, bndptr, bndind, bndtype) \ do { \ - ASSERT(bndptr[vtx] == -1); \ - bndind[nbnd] = vtx; \ - bndptr[vtx] = nbnd++;\ + idx_t k; \ + cnbr_t *mynbrs; \ + \ + if (myrinfo->inbr == -1) { \ + myrinfo->inbr = cnbrpoolGetNext(ctrl, adjlen); \ + myrinfo->nnbrs = 0; \ + } \ + ASSERT(CheckRInfo(ctrl, myrinfo)); \ + \ + mynbrs = ctrl->cnbrpool + myrinfo->inbr; \ + \ + /* Update global ID/ED and boundary */ \ + if (me == from) { \ + INC_DEC(myrinfo->ed, myrinfo->id, (ewgt)); \ + if (bndtype == BNDTYPE_REFINE) { \ + if (myrinfo->ed-myrinfo->id >= 0 && bndptr[(vid)] == -1) \ + BNDInsert(nbnd, bndind, bndptr, (vid)); \ + } \ + else { \ + if (myrinfo->ed > 0 && bndptr[(vid)] == -1) \ + BNDInsert(nbnd, bndind, bndptr, (vid)); \ + } \ + } \ + else if (me == to) { \ + INC_DEC(myrinfo->id, myrinfo->ed, (ewgt)); \ + if (bndtype == BNDTYPE_REFINE) { \ + if (myrinfo->ed-myrinfo->id < 0 && bndptr[(vid)] != -1) \ + BNDDelete(nbnd, bndind, bndptr, (vid)); \ + } \ + else { \ + if (myrinfo->ed <= 0 && bndptr[(vid)] != -1) \ + BNDDelete(nbnd, bndind, bndptr, (vid)); \ + } \ + } \ + \ + /* Remove contribution from the .ed of 'from' */ \ + if (me != from) { \ + for (k=0; knnbrs; k++) { \ + if (mynbrs[k].pid == from) { \ + if (mynbrs[k].ed == (ewgt)) \ + mynbrs[k] = mynbrs[--myrinfo->nnbrs]; \ + else \ + mynbrs[k].ed -= (ewgt); \ + break; \ + } \ + } \ + } \ + \ + /* Add contribution to the .ed of 'to' */ \ + if (me != to) { \ + for (k=0; knnbrs; k++) { \ + if (mynbrs[k].pid == to) { \ + mynbrs[k].ed += (ewgt); \ + break; \ + } \ + } \ + if (k == myrinfo->nnbrs) { \ + mynbrs[k].pid = to; \ + mynbrs[k].ed = (ewgt); \ + myrinfo->nnbrs++; \ + } \ + } \ + \ + ASSERT(CheckRInfo(ctrl, myrinfo));\ } while(0) -#define BNDDelete(nbnd, bndind, bndptr, vtx) \ + +#define UpdateQueueInfo(queue, vstatus, vid, me, from, to, myrinfo, oldnnbrs, \ + nupd, updptr, updind, bndtype) \ do { \ - ASSERT(bndptr[vtx] != -1); \ - bndind[bndptr[vtx]] = bndind[--nbnd]; \ - bndptr[bndind[nbnd]] = bndptr[vtx]; \ - bndptr[vtx] = -1; \ + real_t rgain; \ + \ + if (me == to || me == from || oldnnbrs != myrinfo->nnbrs) { \ + rgain = (myrinfo->nnbrs > 0 ? \ + 1.0*myrinfo->ed/sqrt(myrinfo->nnbrs) : 0.0) - myrinfo->id; \ + \ + if (bndtype == BNDTYPE_REFINE) { \ + if (vstatus[(vid)] == VPQSTATUS_PRESENT) { \ + if (myrinfo->ed-myrinfo->id >= 0) \ + rpqUpdate(queue, (vid), rgain); \ + else { \ + rpqDelete(queue, (vid)); \ + vstatus[(vid)] = VPQSTATUS_NOTPRESENT; \ + ListDelete(nupd, updind, updptr, (vid)); \ + } \ + } \ + else if (vstatus[(vid)] == VPQSTATUS_NOTPRESENT && myrinfo->ed-myrinfo->id >= 0) { \ + rpqInsert(queue, (vid), rgain); \ + vstatus[(vid)] = VPQSTATUS_PRESENT; \ + ListInsert(nupd, updind, updptr, (vid)); \ + } \ + } \ + else { \ + if (vstatus[(vid)] == VPQSTATUS_PRESENT) { \ + if (myrinfo->ed > 0) \ + rpqUpdate(queue, (vid), rgain); \ + else { \ + rpqDelete(queue, (vid)); \ + vstatus[(vid)] = VPQSTATUS_NOTPRESENT; \ + ListDelete(nupd, updind, updptr, (vid)); \ + } \ + } \ + else if (vstatus[(vid)] == VPQSTATUS_NOTPRESENT && myrinfo->ed > 0) { \ + rpqInsert(queue, (vid), rgain); \ + vstatus[(vid)] = VPQSTATUS_PRESENT; \ + ListInsert(nupd, updind, updptr, (vid)); \ + } \ + } \ + } \ } while(0) -/************************************************************************* -* These are debugging macros -**************************************************************************/ -#ifndef NDEBUG -# define ASSERT(expr) \ - if (!(expr)) { \ - printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \ - __LINE__, __FILE__); \ - abort(); \ - } - -# define ASSERTP(expr, msg) \ - if (!(expr)) { \ - printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \ - __LINE__, __FILE__); \ - printf msg ; \ - abort(); \ - } -#else -# define ASSERT(expr) ; -# define ASSERTP(expr, msg) ; -#endif +/*************************************************************************/ +/*! This macro determines the set of subdomains that a vertex can move to + without increasins the maxndoms. */ +/*************************************************************************/ +#define SelectSafeTargetSubdomains(myrinfo, mynbrs, nads, adids, maxndoms, safetos, vtmp) \ + do { \ + idx_t j, k, l, nadd, to; \ + for (j=0; jnnbrs; j++) { \ + safetos[to = mynbrs[j].pid] = 0; \ + \ + /* uncompress the connectivity info for the 'to' subdomain */ \ + for (k=0; knnbrs; k++) { \ + if (k == j) \ + continue; \ + \ + l = mynbrs[k].pid; \ + if (vtmp[l] == 0) { \ + if (nads[l] > maxndoms-1) { \ + nadd = maxndoms; \ + break; \ + } \ + nadd++; \ + } \ + } \ + if (nads[to]+nadd <= maxndoms) \ + safetos[to] = 1; \ + if (nadd == 0) \ + safetos[to] = 2; \ + \ + /* cleanup the connectivity info due to the 'to' subdomain */ \ + for (k=0; k - - -/************************************************************************* -* This function finds a matching using the HEM heuristic -**************************************************************************/ -void Match_RM(CtrlType *ctrl, GraphType *graph) -{ - int i, ii, j, nvtxs, cnvtxs, maxidx; - idxtype *xadj, *vwgt, *adjncy, *adjwgt; - idxtype *match, *cmap, *perm; - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->MatchTmr)); - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - vwgt = graph->vwgt; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - cmap = graph->cmap; - match = idxset(nvtxs, UNMATCHED, idxwspacemalloc(ctrl, nvtxs)); - - perm = idxwspacemalloc(ctrl, nvtxs); - RandomPermute(nvtxs, perm, 1); - - cnvtxs = 0; - for (ii=0; iimaxvwgt) { - maxidx = adjncy[j]; - break; - } - } - - cmap[i] = cmap[maxidx] = cnvtxs++; - match[i] = maxidx; - match[maxidx] = i; - } - } - - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->MatchTmr)); - - CreateCoarseGraph(ctrl, graph, cnvtxs, match, perm); - - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); -} - - -/************************************************************************* -* This function finds a matching using the HEM heuristic -**************************************************************************/ -void Match_RM_NVW(CtrlType *ctrl, GraphType *graph) -{ - int i, ii, j, nvtxs, cnvtxs, maxidx; - idxtype *xadj, *adjncy; - idxtype *match, *cmap, *perm; - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->MatchTmr)); - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; - - cmap = graph->cmap; - match = idxset(nvtxs, UNMATCHED, idxwspacemalloc(ctrl, nvtxs)); - - perm = idxwspacemalloc(ctrl, nvtxs); - RandomPermute(nvtxs, perm, 1); - - cnvtxs = 0; - for (ii=0; iidbglvl, DBG_TIME, stoptimer(ctrl->MatchTmr)); - - CreateCoarseGraph_NVW(ctrl, graph, cnvtxs, match, perm); - - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); -} - - - -/************************************************************************* -* This function finds a matching using the HEM heuristic -**************************************************************************/ -void Match_HEM(CtrlType *ctrl, GraphType *graph) -{ - int i, ii, j, k, nvtxs, cnvtxs, maxidx, maxwgt; - idxtype *xadj, *vwgt, *adjncy, *adjwgt; - idxtype *match, *cmap, *perm; - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->MatchTmr)); - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - vwgt = graph->vwgt; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - cmap = graph->cmap; - match = idxset(nvtxs, UNMATCHED, idxwspacemalloc(ctrl, nvtxs)); - - perm = idxwspacemalloc(ctrl, nvtxs); - RandomPermute(nvtxs, perm, 1); - - cnvtxs = 0; - for (ii=0; iimaxvwgt) { - maxwgt = adjwgt[j]; - maxidx = adjncy[j]; - } - } - - cmap[i] = cmap[maxidx] = cnvtxs++; - match[i] = maxidx; - match[maxidx] = i; - } - } - - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->MatchTmr)); - - CreateCoarseGraph(ctrl, graph, cnvtxs, match, perm); - - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); -} - - - -/************************************************************************* -* This function finds a matching using the HEM heuristic -**************************************************************************/ -void Match_SHEM(CtrlType *ctrl, GraphType *graph) -{ - int i, ii, j, k, nvtxs, cnvtxs, maxidx, maxwgt, avgdegree; - idxtype *xadj, *vwgt, *adjncy, *adjwgt; - idxtype *match, *cmap, *degrees, *perm, *tperm; - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->MatchTmr)); - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - vwgt = graph->vwgt; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - cmap = graph->cmap; - match = idxset(nvtxs, UNMATCHED, idxwspacemalloc(ctrl, nvtxs)); - - perm = idxwspacemalloc(ctrl, nvtxs); - tperm = idxwspacemalloc(ctrl, nvtxs); - degrees = idxwspacemalloc(ctrl, nvtxs); - - RandomPermute(nvtxs, tperm, 1); - avgdegree = 0.7*(xadj[nvtxs]/nvtxs); - for (i=0; i avgdegree ? avgdegree : xadj[i+1]-xadj[i]); - BucketSortKeysInc(nvtxs, avgdegree, degrees, tperm, perm); - - cnvtxs = 0; - - /* Take care any islands. Islands are matched with non-islands due to coarsening */ - for (ii=0; iiii; j--) { - k = perm[j]; - if (match[k] == UNMATCHED && xadj[k] < xadj[k+1]) { - maxidx = k; - break; - } - } - - cmap[i] = cmap[maxidx] = cnvtxs++; - match[i] = maxidx; - match[maxidx] = i; - } - } - - /* Continue with normal matching */ - for (; iimaxvwgt) { - maxwgt = adjwgt[j]; - maxidx = adjncy[j]; - } - } - - cmap[i] = cmap[maxidx] = cnvtxs++; - match[i] = maxidx; - match[maxidx] = i; - } - } - - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->MatchTmr)); - - idxwspacefree(ctrl, nvtxs); /* degrees */ - idxwspacefree(ctrl, nvtxs); /* tperm */ - - CreateCoarseGraph(ctrl, graph, cnvtxs, match, perm); - - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); -} - diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mbalance.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mbalance.c deleted file mode 100644 index 65e9961f..00000000 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mbalance.c +++ /dev/null @@ -1,260 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * mbalance.c - * - * This file contains code that is used to forcefully balance either - * bisections or k-sections - * - * Started 7/29/97 - * George - * - * $Id: mbalance.c,v 1.1 2003/07/16 15:55:07 karypis Exp $ - * - */ - -#include - - -/************************************************************************* -* This function is the entry point of the bisection balancing algorithms. -**************************************************************************/ -void MocBalance2Way(CtrlType *ctrl, GraphType *graph, float *tpwgts, float lbfactor) -{ - - if (Compute2WayHLoadImbalance(graph->ncon, graph->npwgts, tpwgts) < lbfactor) - return; - - MocGeneral2WayBalance(ctrl, graph, tpwgts, lbfactor); - -} - - -/************************************************************************* -* This function performs an edge-based FM refinement -**************************************************************************/ -void MocGeneral2WayBalance(CtrlType *ctrl, GraphType *graph, float *tpwgts, float lbfactor) -{ - int i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, me, limit, tmp, cnum; - idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; - idxtype *moved, *swaps, *perm, *qnum; - float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal; - PQueueType parts[MAXNCON][2]; - int higain, oldgain, mincut, newcut, mincutorder; - int qsizes[MAXNCON][2]; - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - nvwgt = graph->nvwgt; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - where = graph->where; - id = graph->id; - ed = graph->ed; - npwgts = graph->npwgts; - bndptr = graph->bndptr; - bndind = graph->bndind; - - moved = idxwspacemalloc(ctrl, nvtxs); - swaps = idxwspacemalloc(ctrl, nvtxs); - perm = idxwspacemalloc(ctrl, nvtxs); - qnum = idxwspacemalloc(ctrl, nvtxs); - - limit = amin(amax(0.01*nvtxs, 15), 100); - - /* Initialize the queues */ - for (i=0; i qsizes[j][from] && nvwgt[i*ncon+qnum[i]] < 1.3*nvwgt[i*ncon+j]) { - qsizes[qnum[i]][from]--; - qsizes[j][from]++; - qnum[i] = j; - } - } - } - } - } - -/* - printf("Weight Distribution (after):\t "); - for (i=0; imincut; - mincutorder = -1; - - if (ctrl->dbglvl&DBG_REFINE) { - printf("Parts: ["); - for (l=0; lnvtxs, graph->nbnd, graph->mincut, origbal); - } - - idxset(nvtxs, -1, moved); - - ASSERT(ComputeCut(graph, where) == graph->mincut); - ASSERT(CheckBnd(graph)); - - /* Insert all nodes in the priority queues */ - nbnd = graph->nbnd; - RandomPermute(nvtxs, perm, 1); - for (ii=0; ii limit) { /* We hit the limit, undo last move */ - newcut += (ed[higain]-id[higain]); - saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); - saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); - break; - } - - where[higain] = to; - moved[higain] = nswaps; - swaps[nswaps] = higain; - - if (ctrl->dbglvl&DBG_MOVEINFO) { - printf("Moved %6d from %d(%d). Gain: %5d, Cut: %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut); - for (l=0; l 0 && bndptr[higain] == -1) - BNDInsert(nbnd, bndind, bndptr, higain); - - for (j=xadj[higain]; j 0 && bndptr[k] == -1) - BNDInsert(nbnd, bndind, bndptr, k); - } - } - - - - /**************************************************************** - * Roll back computations - *****************************************************************/ - for (nswaps--; nswaps>mincutorder; nswaps--) { - higain = swaps[nswaps]; - - to = where[higain] = (where[higain]+1)%2; - SWAP(id[higain], ed[higain], tmp); - if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) - BNDDelete(nbnd, bndind, bndptr, higain); - else if (ed[higain] > 0 && bndptr[higain] == -1) - BNDInsert(nbnd, bndind, bndptr, higain); - - saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); - saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); - for (j=xadj[higain]; j 0) - BNDInsert(nbnd, bndind, bndptr, k); - } - } - - if (ctrl->dbglvl&DBG_REFINE) { - printf("\tMincut: %6d at %5d, NBND: %6d, NPwgts: [", mincut, mincutorder, nbnd); - for (l=0; lmincut = mincut; - graph->nbnd = nbnd; - - - for (i=0; i - - -/************************************************************************* -* This function is the entry point of the bisection balancing algorithms. -**************************************************************************/ -void MocBalance2Way2(CtrlType *ctrl, GraphType *graph, float *tpwgts, float *ubvec) -{ - int i; - float tvec[MAXNCON]; - - Compute2WayHLoadImbalanceVec(graph->ncon, graph->npwgts, tpwgts, tvec); - if (!AreAllBelow(graph->ncon, tvec, ubvec)) - MocGeneral2WayBalance2(ctrl, graph, tpwgts, ubvec); -} - - - -/************************************************************************* -* This function performs an edge-based FM refinement -**************************************************************************/ -void MocGeneral2WayBalance2(CtrlType *ctrl, GraphType *graph, float *tpwgts, float *ubvec) -{ - int i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, me, limit, tmp, cnum; - idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; - idxtype *moved, *swaps, *perm, *qnum; - float *nvwgt, *npwgts, origbal[MAXNCON], minbal[MAXNCON], newbal[MAXNCON]; - PQueueType parts[MAXNCON][2]; - int higain, oldgain, mincut, newcut, mincutorder; - float *maxwgt, *minwgt, tvec[MAXNCON]; - - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - nvwgt = graph->nvwgt; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - where = graph->where; - id = graph->id; - ed = graph->ed; - npwgts = graph->npwgts; - bndptr = graph->bndptr; - bndind = graph->bndind; - - moved = idxwspacemalloc(ctrl, nvtxs); - swaps = idxwspacemalloc(ctrl, nvtxs); - perm = idxwspacemalloc(ctrl, nvtxs); - qnum = idxwspacemalloc(ctrl, nvtxs); - - limit = amin(amax(0.01*nvtxs, 15), 100); - - /* Setup the weight intervals of the two subdomains */ - minwgt = fwspacemalloc(ctrl, 2*ncon); - maxwgt = fwspacemalloc(ctrl, 2*ncon); - - for (i=0; i<2; i++) { - for (j=0; jmincut; - mincutorder = -1; - - if (ctrl->dbglvl&DBG_REFINE) { - printf("Parts: ["); - for (l=0; lnvtxs, graph->nbnd, graph->mincut); - for (i=0; imincut); - ASSERT(CheckBnd(graph)); - - /* Insert all nodes in the priority queues */ - nbnd = graph->nbnd; - RandomPermute(nvtxs, perm, 1); - for (ii=0; ii limit) { /* We hit the limit, undo last move */ - newcut += (ed[higain]-id[higain]); - saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); - saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); - break; - } - - where[higain] = to; - moved[higain] = nswaps; - swaps[nswaps] = higain; - - if (ctrl->dbglvl&DBG_MOVEINFO) { - printf("Moved %6d from %d(%d). Gain: %5d, Cut: %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut); - for (i=0; i 0 && bndptr[higain] == -1) - BNDInsert(nbnd, bndind, bndptr, higain); - - for (j=xadj[higain]; j 0 && bndptr[k] == -1) - BNDInsert(nbnd, bndind, bndptr, k); - } - - } - - - - /**************************************************************** - * Roll back computations - *****************************************************************/ - for (i=0; imincutorder; nswaps--) { - higain = swaps[nswaps]; - - to = where[higain] = (where[higain]+1)%2; - SWAP(id[higain], ed[higain], tmp); - if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) - BNDDelete(nbnd, bndind, bndptr, higain); - else if (ed[higain] > 0 && bndptr[higain] == -1) - BNDInsert(nbnd, bndind, bndptr, higain); - - saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); - saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); - for (j=xadj[higain]; j 0) - BNDInsert(nbnd, bndind, bndptr, k); - } - } - - if (ctrl->dbglvl&DBG_REFINE) { - printf("\tMincut: %6d at %5d, NBND: %6d, NPwgts: [", mincut, mincutorder, nbnd); - for (i=0; imincut = mincut; - graph->nbnd = nbnd; - - - for (i=0; i= maxdiff) { - maxdiff = diff; - *from = j; - *cnum = i; - } - } - } - -/* DELETE -j = *from; -for (i=0; i 0) { - maxdiff = (npwgts[(*from)*ncon+i] - maxwgt[(*from)*ncon+i]); - *cnum = i; - break; - } - } - - for (i++; i maxdiff && PQueueGetSize(&queues[i][*from]) > 0) { - maxdiff = diff; - *cnum = i; - } - } - } - - /* If the constraints ar OK, select a high gain vertex */ - if (*from == -1) { - maxgain = -100000; - for (j=0; j<2; j++) { - for (i=0; i 0 && PQueueGetKey(&queues[i][j]) > maxgain) { - maxgain = PQueueGetKey(&queues[i][0]); - *from = j; - *cnum = i; - } - } - } - - /* printf("(%2d %2d) %3d\n", *from, *cnum, maxgain); */ - } -} diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mcoarsen.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mcoarsen.c deleted file mode 100644 index 336e6c62..00000000 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mcoarsen.c +++ /dev/null @@ -1,106 +0,0 @@ -/* - * mcoarsen.c - * - * This file contains the driving routines for the coarsening process - * - * Started 7/23/97 - * George - * - * $Id: mcoarsen.c,v 1.2 2003/07/31 16:23:29 karypis Exp $ - * - */ - -#include - - -/************************************************************************* -* This function takes a graph and creates a sequence of coarser graphs -**************************************************************************/ -GraphType *MCCoarsen2Way(CtrlType *ctrl, GraphType *graph) -{ - int i, clevel; - GraphType *cgraph; - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->CoarsenTmr)); - - cgraph = graph; - - clevel = 0; - do { - if (ctrl->dbglvl&DBG_COARSEN) { - printf("%6d %7d %10d [%d] [%6.4f", cgraph->nvtxs, cgraph->nedges, - idxsum(cgraph->nvtxs, cgraph->adjwgtsum), ctrl->CoarsenTo, ctrl->nmaxvwgt); - for (i=0; incon; i++) - printf(" %5.3f", ssum_strd(cgraph->nvtxs, cgraph->nvwgt+i, cgraph->ncon)); - printf("]\n"); - } - - switch (ctrl->CType) { - case MATCH_RM: - MCMatch_RM(ctrl, cgraph); - break; - case MATCH_HEM: - if (clevel < 1 || cgraph->nedges == 0) - MCMatch_RM(ctrl, cgraph); - else - MCMatch_HEM(ctrl, cgraph); - break; - case MATCH_SHEM: - if (clevel < 1 || cgraph->nedges == 0) - MCMatch_RM(ctrl, cgraph); - else - MCMatch_SHEM(ctrl, cgraph); - break; - case MATCH_SHEMKWAY: - if (clevel < 1 || cgraph->nedges == 0) - MCMatch_RM(ctrl, cgraph); - else - MCMatch_SHEM(ctrl, cgraph); - break; - case MATCH_SHEBM_ONENORM: - if (clevel < 1 || cgraph->nedges == 0) - MCMatch_RM(ctrl, cgraph); - else - MCMatch_SHEBM(ctrl, cgraph, 1); - break; - case MATCH_SHEBM_INFNORM: - if (clevel < 1 || cgraph->nedges == 0) - MCMatch_RM(ctrl, cgraph); - else - MCMatch_SHEBM(ctrl, cgraph, -1); - break; - case MATCH_SBHEM_ONENORM: - if (clevel < 1 || cgraph->nedges == 0) - MCMatch_RM(ctrl, cgraph); - else - MCMatch_SBHEM(ctrl, cgraph, 1); - break; - case MATCH_SBHEM_INFNORM: - if (clevel < 1 || cgraph->nedges == 0) - MCMatch_RM(ctrl, cgraph); - else - MCMatch_SBHEM(ctrl, cgraph, -1); - break; - default: - errexit("Unknown CType: %d\n", ctrl->CType); - } - - cgraph = cgraph->coarser; - clevel++; - - } while (cgraph->nvtxs > ctrl->CoarsenTo && cgraph->nvtxs < COARSEN_FRACTION2*cgraph->finer->nvtxs && cgraph->nedges > cgraph->nvtxs/2); - - if (ctrl->dbglvl&DBG_COARSEN) { - printf("%6d %7d %10d [%d] [%6.4f", cgraph->nvtxs, cgraph->nedges, - idxsum(cgraph->nvtxs, cgraph->adjwgtsum), ctrl->CoarsenTo, ctrl->nmaxvwgt); - for (i=0; incon; i++) - printf(" %5.3f", ssum_strd(cgraph->nvtxs, cgraph->nvwgt+i, cgraph->ncon)); - printf("]\n"); - } - - - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->CoarsenTmr)); - - return cgraph; -} - diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mcutil.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mcutil.c new file mode 100644 index 00000000..6e20f556 --- /dev/null +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mcutil.c @@ -0,0 +1,330 @@ +/* + * mutil.c + * + * This file contains various utility functions for the MOC portion of the + * code + * + * Started 2/15/98 + * George + * + * $Id: mcutil.c 13901 2013-03-24 16:17:03Z karypis $ + * + */ + +#include "metislib.h" + + +/*************************************************************************/ +/*! This function compares two vectors x & y and returns true + if \forall i, x[i] <= y[i]. +*/ +/**************************************************************************/ +int rvecle(idx_t n, real_t *x, real_t *y) +{ + for (n--; n>=0; n--) { + if (x[n] > y[n]) + return 0; + } + + return 1; +} + + +/*************************************************************************/ +/*! This function compares two vectors x & y and returns true + if \forall i, x[i] >= y[i]. +*/ +/**************************************************************************/ +int rvecge(idx_t n, real_t *x, real_t *y) +{ + for (n--; n>=0; n--) { + if (x[n] < y[n]) + return 0; + } + + return 1; +} + + +/*************************************************************************/ +/*! This function compares vectors x1+x2 against y and returns true + if \forall i, x1[i]+x2[i] <= y[i]. +*/ +/**************************************************************************/ +int rvecsumle(idx_t n, real_t *x1, real_t *x2, real_t *y) +{ + for (n--; n>=0; n--) { + if (x1[n]+x2[n] > y[n]) + return 0; + } + + return 1; +} + + +/*************************************************************************/ +/*! This function returns max_i(x[i]-y[i]) */ +/**************************************************************************/ +real_t rvecmaxdiff(idx_t n, real_t *x, real_t *y) +{ + real_t max; + + max = x[0]-y[0]; + + for (n--; n>0; n--) { + if (max < x[n]-y[n]) + max = x[n]-y[n]; + } + + return max; +} + + +/*************************************************************************/ +/*! This function returns true if \forall i, x[i] <= z[i]. */ +/**************************************************************************/ +int ivecle(idx_t n, idx_t *x, idx_t *z) +{ + for (n--; n>=0; n--) { + if (x[n] > z[n]) + return 0; + } + + return 1; +} + + +/*************************************************************************/ +/*! This function returns true if \forall i, x[i] >= z[i]. */ +/**************************************************************************/ +int ivecge(idx_t n, idx_t *x, idx_t *z) +{ + for (n--; n>=0; n--) { + if (x[n] < z[n]) + return 0; + } + + return 1; +} + + +/*************************************************************************/ +/*! This function returns true if \forall i, a*x[i]+y[i] <= z[i]. */ +/**************************************************************************/ +int ivecaxpylez(idx_t n, idx_t a, idx_t *x, idx_t *y, idx_t *z) +{ + for (n--; n>=0; n--) { + if (a*x[n]+y[n] > z[n]) + return 0; + } + + return 1; +} + + +/*************************************************************************/ +/*! This function returns true if \forall i, a*x[i]+y[i] >= z[i]. */ +/**************************************************************************/ +int ivecaxpygez(idx_t n, idx_t a, idx_t *x, idx_t *y, idx_t *z) +{ + for (n--; n>=0; n--) { + if (a*x[n]+y[n] < z[n]) + return 0; + } + + return 1; +} + + +/*************************************************************************/ +/*! This function checks if v+u2 provides a better balance in the weight + vector that v+u1 */ +/*************************************************************************/ +int BetterVBalance(idx_t ncon, real_t *invtvwgt, idx_t *v_vwgt, idx_t *u1_vwgt, + idx_t *u2_vwgt) +{ + idx_t i; + real_t sum1=0.0, sum2=0.0, diff1=0.0, diff2=0.0; + + for (i=0; i= 0); +} + + +/*************************************************************************/ +/*! This function takes two ubfactor-centered load imbalance vectors x & y, + and returns true if y is better balanced than x. */ +/*************************************************************************/ +int BetterBalance2Way(idx_t n, real_t *x, real_t *y) +{ + real_t nrm1=0.0, nrm2=0.0; + + for (--n; n>=0; n--) { + if (x[n] > 0) nrm1 += x[n]*x[n]; + if (y[n] > 0) nrm2 += y[n]*y[n]; + } + return nrm2 < nrm1; +} + + +/*************************************************************************/ +/*! Given a vertex and two weights, this function returns 1, if the second + partition will be more balanced than the first after the weighted + additional of that vertex. + The balance determination takes into account the ideal target weights + of the two partitions. +*/ +/*************************************************************************/ +int BetterBalanceKWay(idx_t ncon, idx_t *vwgt, real_t *ubvec, + idx_t a1, idx_t *pt1, real_t *bm1, + idx_t a2, idx_t *pt2, real_t *bm2) +{ + idx_t i; + real_t tmp, nrm1=0.0, nrm2=0.0, max1=0.0, max2=0.0; + + for (i=0; i max1 ? tmp : max1); + + tmp = bm2[i]*(pt2[i]+a2*vwgt[i]) - ubvec[i]; + //printf("%+.4f ", (float)tmp); + nrm2 += tmp*tmp; + max2 = (tmp > max2 ? tmp : max2); + + //printf("%4d %4d %4d %4d %4d %4d %4d %.2f\n", + // (int)vwgt[i], + // (int)a1, (int)pt1[i], (int)tpt1[i], + // (int)a2, (int)pt2[i], (int)tpt2[i], ubvec[i]); + } + //printf(" %.3f %.3f %.3f %.3f\n", (float)max1, (float)nrm1, (float)max2, (float)nrm2); + + if (max2 < max1) + return 1; + + if (max2 == max1 && nrm2 < nrm1) + return 1; + + return 0; +} + + +/*************************************************************************/ +/*! Computes the maximum load imbalance of a partitioning solution over + all the constraints. */ +/**************************************************************************/ +real_t ComputeLoadImbalance(graph_t *graph, idx_t nparts, real_t *pijbm) +{ + idx_t i, j, ncon, *pwgts; + real_t max, cur; + + ncon = graph->ncon; + pwgts = graph->pwgts; + + max = 1.0; + for (i=0; i max) + max = cur; + } + } + + return max; +} + + +/*************************************************************************/ +/*! Computes the maximum load imbalance difference of a partitioning + solution over all the constraints. + The difference is defined with respect to the allowed maximum + unbalance for the respective constraint. + */ +/**************************************************************************/ +real_t ComputeLoadImbalanceDiff(graph_t *graph, idx_t nparts, real_t *pijbm, + real_t *ubvec) +{ + idx_t i, j, ncon, *pwgts; + real_t max, cur; + + ncon = graph->ncon; + pwgts = graph->pwgts; + + max = -1.0; + for (i=0; i max) + max = cur; + } + } + + return max; +} + + +/*************************************************************************/ +/*! Computes the difference between load imbalance of each constraint across + the partitions minus the desired upper bound on the load imabalnce. + It also returns the maximum load imbalance across the partitions & + constraints. */ +/**************************************************************************/ +real_t ComputeLoadImbalanceDiffVec(graph_t *graph, idx_t nparts, real_t *pijbm, + real_t *ubfactors, real_t *diffvec) +{ + idx_t i, j, ncon, *pwgts; + real_t cur, max; + + ncon = graph->ncon; + pwgts = graph->pwgts; + + for (max=-1.0, i=0; i diffvec[i]) + diffvec[i] = cur; + } + if (max < diffvec[i]) + max = diffvec[i]; + } + + return max; +} + + +/*************************************************************************/ +/*! Computes the load imbalance of each constraint across the partitions. */ +/**************************************************************************/ +void ComputeLoadImbalanceVec(graph_t *graph, idx_t nparts, real_t *pijbm, + real_t *lbvec) +{ + idx_t i, j, ncon, *pwgts; + real_t cur; + + ncon = graph->ncon; + pwgts = graph->pwgts; + + for (i=0; i lbvec[i]) + lbvec[i] = cur; + } + } +} + + diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/memory.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/memory.c deleted file mode 100644 index 507ac183..00000000 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/memory.c +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * memory.c - * - * This file contains routines that deal with memory allocation - * - * Started 2/24/96 - * George - * - * $Id: memory.c,v 1.1 2003/07/24 18:39:08 karypis Exp $ - * - */ - -#include - - -/************************************************************************* -* This function allocates memory for the workspace -**************************************************************************/ -void AllocateWorkSpace(CtrlType *ctrl, GraphType *graph, int nparts) -{ - ctrl->wspace.pmat = NULL; - - if (ctrl->optype == OP_KMETIS) { - ctrl->wspace.edegrees = (EDegreeType *)GKmalloc(graph->nedges*sizeof(EDegreeType), "AllocateWorkSpace: edegrees"); - ctrl->wspace.vedegrees = NULL; - ctrl->wspace.auxcore = (idxtype *)ctrl->wspace.edegrees; - - ctrl->wspace.pmat = idxmalloc(nparts*nparts, "AllocateWorkSpace: pmat"); - - /* Memory requirements for different phases - Coarsening - Matching: 4*nvtxs vectors - Contraction: 2*nvtxs vectors (from the above 4), 1*nparts, 1*Nedges - Total = MAX(4*nvtxs, 2*nvtxs+nparts+nedges) - - Refinement - Random Refinement/Balance: 5*nparts + 1*nvtxs + 2*nedges - Greedy Refinement/Balance: 5*nparts + 2*nvtxs + 2*nedges + 1*PQueue(==Nvtxs) - Total = 5*nparts + 3*nvtxs + 2*nedges - - Total = 5*nparts + 3*nvtxs + 2*nedges - */ - ctrl->wspace.maxcore = 3*(graph->nvtxs+1) + /* Match/Refinement vectors */ - 5*(nparts+1) + /* Partition weights etc */ - graph->nvtxs*(sizeof(ListNodeType)/sizeof(idxtype)) + /* Greedy k-way balance/refine */ - 20 /* padding for 64 bit machines */ - ; - } - else if (ctrl->optype == OP_KVMETIS) { - ctrl->wspace.edegrees = NULL; - ctrl->wspace.vedegrees = (VEDegreeType *)GKmalloc(graph->nedges*sizeof(VEDegreeType), "AllocateWorkSpace: vedegrees"); - ctrl->wspace.auxcore = (idxtype *)ctrl->wspace.vedegrees; - - ctrl->wspace.pmat = idxmalloc(nparts*nparts, "AllocateWorkSpace: pmat"); - - /* Memory requirements for different phases are identical to KMETIS */ - ctrl->wspace.maxcore = 3*(graph->nvtxs+1) + /* Match/Refinement vectors */ - 3*(nparts+1) + /* Partition weights etc */ - graph->nvtxs*(sizeof(ListNodeType)/sizeof(idxtype)) + /* Greedy k-way balance/refine */ - 20 /* padding for 64 bit machines */ - ; - } - else { - ctrl->wspace.edegrees = (EDegreeType *)idxmalloc(graph->nedges, "AllocateWorkSpace: edegrees"); - ctrl->wspace.vedegrees = NULL; - ctrl->wspace.auxcore = (idxtype *)ctrl->wspace.edegrees; - - ctrl->wspace.maxcore = 5*(graph->nvtxs+1) + /* Refinement vectors */ - 4*(nparts+1) + /* Partition weights etc */ - 2*graph->ncon*graph->nvtxs*(sizeof(ListNodeType)/sizeof(idxtype)) + /* 2-way refinement */ - 2*graph->ncon*(NEG_GAINSPAN+PLUS_GAINSPAN+1)*(sizeof(ListNodeType *)/sizeof(idxtype)) + /* 2-way refinement */ - 20 /* padding for 64 bit machines */ - ; - } - - ctrl->wspace.maxcore += HTLENGTH; - ctrl->wspace.core = idxmalloc(ctrl->wspace.maxcore, "AllocateWorkSpace: maxcore"); - ctrl->wspace.ccore = 0; -} - - -/************************************************************************* -* This function allocates memory for the workspace -**************************************************************************/ -void FreeWorkSpace(CtrlType *ctrl, GraphType *graph) -{ - GKfree((void **)(&ctrl->wspace.edegrees), &ctrl->wspace.vedegrees, &ctrl->wspace.core, &ctrl->wspace.pmat, LTERM); -} - -/************************************************************************* -* This function returns how may words are left in the workspace -**************************************************************************/ -int WspaceAvail(CtrlType *ctrl) -{ - return ctrl->wspace.maxcore - ctrl->wspace.ccore; -} - - -/************************************************************************* -* This function allocate space from the core -**************************************************************************/ -idxtype *idxwspacemalloc(CtrlType *ctrl, int n) -{ - n += n%2; /* This is a fix for 64 bit machines that require 8-byte pointer allignment */ - - ctrl->wspace.ccore += n; - ASSERT(ctrl->wspace.ccore <= ctrl->wspace.maxcore); - return ctrl->wspace.core + ctrl->wspace.ccore - n; -} - -/************************************************************************* -* This function frees space from the core -**************************************************************************/ -void idxwspacefree(CtrlType *ctrl, int n) -{ - n += n%2; /* This is a fix for 64 bit machines that require 8-byte pointer allignment */ - - ctrl->wspace.ccore -= n; - ASSERT(ctrl->wspace.ccore >= 0); -} - - -/************************************************************************* -* This function allocate space from the core -**************************************************************************/ -float *fwspacemalloc(CtrlType *ctrl, int n) -{ - n += n%2; /* This is a fix for 64 bit machines that require 8-byte pointer allignment */ - - ctrl->wspace.ccore += n; - ASSERT(ctrl->wspace.ccore <= ctrl->wspace.maxcore); - return (float *) (ctrl->wspace.core + ctrl->wspace.ccore - n); -} - -/************************************************************************* -* This function frees space from the core -**************************************************************************/ -void fwspacefree(CtrlType *ctrl, int n) -{ - n += n%2; /* This is a fix for 64 bit machines that require 8-byte pointer allignment */ - - ctrl->wspace.ccore -= n; - ASSERT(ctrl->wspace.ccore >= 0); -} - - - -/************************************************************************* -* This function creates a CoarseGraphType data structure and initializes -* the various fields -**************************************************************************/ -GraphType *CreateGraph(void) -{ - GraphType *graph; - - graph = (GraphType *)GKmalloc(sizeof(GraphType), "CreateCoarseGraph: graph"); - - InitGraph(graph); - - return graph; -} - - -/************************************************************************* -* This function creates a CoarseGraphType data structure and initializes -* the various fields -**************************************************************************/ -void InitGraph(GraphType *graph) -{ - graph->gdata = graph->rdata = NULL; - - graph->nvtxs = graph->nedges = -1; - graph->mincut = graph->minvol = -1; - - graph->xadj = graph->vwgt = graph->adjncy = graph->adjwgt = NULL; - graph->adjwgtsum = NULL; - graph->label = NULL; - graph->cmap = NULL; - - graph->where = graph->pwgts = NULL; - graph->id = graph->ed = NULL; - graph->bndptr = graph->bndind = NULL; - graph->rinfo = NULL; - graph->vrinfo = NULL; - graph->nrinfo = NULL; - - graph->ncon = -1; - graph->nvwgt = NULL; - graph->npwgts = NULL; - - graph->vsize = NULL; - - graph->coarser = graph->finer = NULL; - -} - -/************************************************************************* -* This function deallocates any memory stored in a graph -**************************************************************************/ -void FreeGraph(GraphType *graph) -{ - - GKfree((void **)&graph->gdata, (void **)&graph->nvwgt, (void **)&graph->rdata, - (void **)&graph->npwgts, (void **)&graph->label, LTERM); - GKfree((void **)&graph, LTERM); -} - diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mesh.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mesh.c index 6c66a307..3c526121 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mesh.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mesh.c @@ -9,371 +9,404 @@ * Started 8/18/97 * George * - * $Id: mesh.c,v 1.2 2003/07/22 20:29:03 karypis Exp $ + * $Id: mesh.c 13804 2013-03-04 23:49:08Z karypis $ * */ -#include - -/***************************************************************************** -* This function creates a graph corresponding to the dual of a finite element -* mesh. At this point the supported elements are triangles, tetrahedrons, and -* bricks. -******************************************************************************/ -void METIS_MeshToDual(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, - idxtype *dxadj, idxtype *dadjncy) +#include "metislib.h" + + +/*****************************************************************************/ +/*! This function creates a graph corresponding to the dual of a finite element + mesh. + + \param ne is the number of elements in the mesh. + \param nn is the number of nodes in the mesh. + \param eptr is an array of size ne+1 used to mark the start and end + locations in the nind array. + \param eind is an array that stores for each element the set of node IDs + (indices) that it is made off. The length of this array is equal + to the total number of nodes over all the mesh elements. + \param ncommon is the minimum number of nodes that two elements must share + in order to be connected via an edge in the dual graph. + \param numflag is either 0 or 1 indicating if the numbering of the nodes + starts from 0 or 1, respectively. The same numbering is used for the + returned graph as well. + \param r_xadj indicates where the adjacency list of each vertex is stored + in r_adjncy. The memory for this array is allocated by this routine. + It can be freed by calling METIS_free(). + \param r_adjncy stores the adjacency list of each vertex in the generated + dual graph. The memory for this array is allocated by this routine. + It can be freed by calling METIS_free(). + +*/ +/*****************************************************************************/ +int METIS_MeshToDual(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, + idx_t *ncommon, idx_t *numflag, idx_t **r_xadj, idx_t **r_adjncy) { - int esizes[] = {-1, 3, 4, 8, 4}; + int sigrval=0, renumber=0; + + /* set up malloc cleaning code and signal catchers */ + if (!gk_malloc_init()) + return METIS_ERROR_MEMORY; + + gk_sigtrap(); + + if ((sigrval = gk_sigcatch()) != 0) + goto SIGTHROW; + + + /* renumber the mesh */ + if (*numflag == 1) { + ChangeMesh2CNumbering(*ne, eptr, eind); + renumber = 1; + } + + /* create dual graph */ + *r_xadj = *r_adjncy = NULL; + CreateGraphDual(*ne, *nn, eptr, eind, *ncommon, r_xadj, r_adjncy); + - if (*numflag == 1) - ChangeMesh2CNumbering((*ne)*esizes[*etype], elmnts); +SIGTHROW: + if (renumber) + ChangeMesh2FNumbering(*ne, eptr, eind, *ne, *r_xadj, *r_adjncy); - GENDUALMETIS(*ne, *nn, *etype, elmnts, dxadj, dadjncy); + gk_siguntrap(); + gk_malloc_cleanup(0); - if (*numflag == 1) - ChangeMesh2FNumbering((*ne)*esizes[*etype], elmnts, *ne, dxadj, dadjncy); + if (sigrval != 0) { + if (*r_xadj != NULL) + free(*r_xadj); + if (*r_adjncy != NULL) + free(*r_adjncy); + *r_xadj = *r_adjncy = NULL; + } + + return metis_rcode(sigrval); } -/***************************************************************************** -* This function creates a graph corresponding to the finite element mesh. -* At this point the supported elements are triangles, tetrahedrons. -******************************************************************************/ -void METIS_MeshToNodal(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, - idxtype *dxadj, idxtype *dadjncy) +/*****************************************************************************/ +/*! This function creates a graph corresponding to (almost) the nodal of a + finite element mesh. In the nodal graph, each node is connected to the + nodes corresponding to the union of nodes present in all the elements + in which that node belongs. + + \param ne is the number of elements in the mesh. + \param nn is the number of nodes in the mesh. + \param eptr is an array of size ne+1 used to mark the start and end + locations in the nind array. + \param eind is an array that stores for each element the set of node IDs + (indices) that it is made off. The length of this array is equal + to the total number of nodes over all the mesh elements. + \param numflag is either 0 or 1 indicating if the numbering of the nodes + starts from 0 or 1, respectively. The same numbering is used for the + returned graph as well. + \param r_xadj indicates where the adjacency list of each vertex is stored + in r_adjncy. The memory for this array is allocated by this routine. + It can be freed by calling METIS_free(). + \param r_adjncy stores the adjacency list of each vertex in the generated + dual graph. The memory for this array is allocated by this routine. + It can be freed by calling METIS_free(). + +*/ +/*****************************************************************************/ +int METIS_MeshToNodal(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, + idx_t *numflag, idx_t **r_xadj, idx_t **r_adjncy) { - int esizes[] = {-1, 3, 4, 8, 4}; - - if (*numflag == 1) - ChangeMesh2CNumbering((*ne)*esizes[*etype], elmnts); - - switch (*etype) { - case 1: - TRINODALMETIS(*ne, *nn, elmnts, dxadj, dadjncy); - break; - case 2: - TETNODALMETIS(*ne, *nn, elmnts, dxadj, dadjncy); - break; - case 3: - HEXNODALMETIS(*ne, *nn, elmnts, dxadj, dadjncy); - break; - case 4: - QUADNODALMETIS(*ne, *nn, elmnts, dxadj, dadjncy); - break; + int sigrval=0, renumber=0; + + /* set up malloc cleaning code and signal catchers */ + if (!gk_malloc_init()) + return METIS_ERROR_MEMORY; + + gk_sigtrap(); + + if ((sigrval = gk_sigcatch()) != 0) + goto SIGTHROW; + + + /* renumber the mesh */ + if (*numflag == 1) { + ChangeMesh2CNumbering(*ne, eptr, eind); + renumber = 1; } - if (*numflag == 1) - ChangeMesh2FNumbering((*ne)*esizes[*etype], elmnts, *nn, dxadj, dadjncy); + /* create nodal graph */ + *r_xadj = *r_adjncy = NULL; + CreateGraphNodal(*ne, *nn, eptr, eind, r_xadj, r_adjncy); + + +SIGTHROW: + if (renumber) + ChangeMesh2FNumbering(*ne, eptr, eind, *nn, *r_xadj, *r_adjncy); + + gk_siguntrap(); + gk_malloc_cleanup(0); + + if (sigrval != 0) { + if (*r_xadj != NULL) + free(*r_xadj); + if (*r_adjncy != NULL) + free(*r_adjncy); + *r_xadj = *r_adjncy = NULL; + } + + return metis_rcode(sigrval); } +/*****************************************************************************/ +/*! This function creates the dual of a finite element mesh */ +/*****************************************************************************/ +void CreateGraphDual(idx_t ne, idx_t nn, idx_t *eptr, idx_t *eind, idx_t ncommon, + idx_t **r_xadj, idx_t **r_adjncy) +{ + idx_t i, j, nnbrs; + idx_t *nptr, *nind; + idx_t *xadj, *adjncy; + idx_t *marker, *nbrs; + + if (ncommon < 1) { + printf(" Increased ncommon to 1, as it was initially %"PRIDX"\n", ncommon); + ncommon = 1; + } + + /* construct the node-element list first */ + nptr = ismalloc(nn+1, 0, "CreateGraphDual: nptr"); + nind = imalloc(eptr[ne], "CreateGraphDual: nind"); + + for (i=0; i0; i--) - nptr[i] = nptr[i-1]; - nptr[0] = 0; - - for (i=0; i=nptr[n]; k--) { - if ((kk = nind[k]) <= i) - break; - - kkk = kk&mask; - if ((l = mark[kkk]) == -1) { - ind[m] = kk; - wgt[m] = 1; - mark[kkk] = m++; - } - else if (ind[l] == kk) { - wgt[l]++; - } - else { - for (jj=0; jj0; i--) - dxadj[i] = dxadj[i-1]; - dxadj[0] = 0; - GKfree((void **)&mark,(void **)&nptr,(void **)&nind, LTERM); + idx_t i, ii, j, jj, k, l, overlap; + + /* find all elements that share at least one node with qid */ + for (k=0, i=0; i= ncommon || + overlap >= elen-1 || + overlap >= eptr[l+1]-eptr[l]-1) + nbrs[j++] = l; + marker[l] = 0; + } + + return j; } +/*****************************************************************************/ +/*! This function creates the (almost) nodal of a finite element mesh */ +/*****************************************************************************/ +void CreateGraphNodal(idx_t ne, idx_t nn, idx_t *eptr, idx_t *eind, + idx_t **r_xadj, idx_t **r_adjncy) +{ + idx_t i, j, nnbrs; + idx_t *nptr, *nind; + idx_t *xadj, *adjncy; + idx_t *marker, *nbrs; + + + /* construct the node-element list first */ + nptr = ismalloc(nn+1, 0, "CreateGraphNodal: nptr"); + nind = imalloc(eptr[ne], "CreateGraphNodal: nind"); + + for (i=0; i0; i--) - nptr[i] = nptr[i-1]; - nptr[0] = 0; - - - mark = idxsmalloc(nvtxs, -1, "TRINODALMETIS: mark"); - - nedges = dxadj[0] = 0; - for (i=0; i0; i--) - nptr[i] = nptr[i-1]; - nptr[0] = 0; - - - mark = idxsmalloc(nvtxs, -1, "TETNODALMETIS: mark"); - - nedges = dxadj[0] = 0; - for (i=0; i0; i--) - nptr[i] = nptr[i-1]; - nptr[0] = 0; - - - mark = idxsmalloc(nvtxs, -1, "HEXNODALMETIS: mark"); - - nedges = dxadj[0] = 0; - for (i=0; i0; i--) - nptr[i] = nptr[i-1]; - nptr[0] = 0; - - - mark = idxsmalloc(nvtxs, -1, "QUADNODALMETIS: mark"); - - nedges = dxadj[0] = 0; - for (i=0; ieptr, &mesh->eind, &mesh->ewgt, &mesh, LTERM); + + *r_mesh = NULL; } + diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/meshpart.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/meshpart.c index d26e2523..77fd35c5 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/meshpart.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/meshpart.c @@ -8,197 +8,255 @@ * Started 9/29/97 * George * - * $Id: meshpart.c,v 1.1 2003/07/16 15:55:08 karypis Exp $ + * $Id: meshpart.c 17513 2014-08-05 16:20:50Z dominique $ * */ -#include +#include "metislib.h" /************************************************************************* * This function partitions a finite element mesh by partitioning its nodal * graph using KMETIS and then assigning elements in a load balanced fashion. **************************************************************************/ -void METIS_PartMeshNodal(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, - int *nparts, int *edgecut, idxtype *epart, idxtype *npart) +int METIS_PartMeshNodal(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, + idx_t *vwgt, idx_t *vsize, idx_t *nparts, real_t *tpwgts, + idx_t *options, idx_t *objval, idx_t *epart, idx_t *npart) { - int i, j, k, me; - idxtype *xadj, *adjncy, *pwgts; - int options[10], pnumflag=0, wgtflag=0; - int nnbrs, nbrind[200], nbrwgt[200], maxpwgt; - int esize, esizes[] = {-1, 3, 4, 8, 4}; + int sigrval=0, renumber=0, ptype; + idx_t *xadj=NULL, *adjncy=NULL; + idx_t ncon=1, pnumflag=0; + int rstatus=METIS_OK; - esize = esizes[*etype]; + /* set up malloc cleaning code and signal catchers */ + if (!gk_malloc_init()) + return METIS_ERROR_MEMORY; - if (*numflag == 1) - ChangeMesh2CNumbering((*ne)*esize, elmnts); + gk_sigtrap(); - xadj = idxmalloc(*nn+1, "METIS_MESHPARTNODAL: xadj"); - adjncy = idxmalloc(20*(*nn), "METIS_MESHPARTNODAL: adjncy"); + if ((sigrval = gk_sigcatch()) != 0) + goto SIGTHROW; - METIS_MeshToNodal(ne, nn, elmnts, etype, &pnumflag, xadj, adjncy); + renumber = GETOPTION(options, METIS_OPTION_NUMBERING, 0); + ptype = GETOPTION(options, METIS_OPTION_PTYPE, METIS_PTYPE_KWAY); - adjncy = (idxtype *)realloc(adjncy, xadj[*nn]*sizeof(idxtype)); + /* renumber the mesh */ + if (renumber) { + ChangeMesh2CNumbering(*ne, eptr, eind); + options[METIS_OPTION_NUMBERING] = 0; + } - options[0] = 0; - METIS_PartGraphKway(nn, xadj, adjncy, NULL, NULL, &wgtflag, &pnumflag, nparts, options, edgecut, npart); + /* get the nodal graph */ + rstatus = METIS_MeshToNodal(ne, nn, eptr, eind, &pnumflag, &xadj, &adjncy); + if (rstatus != METIS_OK) + raise(SIGERR); - /* OK, now compute an element partition based on the nodal partition npart */ - idxset(*ne, -1, epart); - pwgts = idxsmalloc(*nparts, 0, "METIS_MESHPARTNODAL: pwgts"); - for (i=0; i<*ne; i++) { - me = npart[elmnts[i*esize]]; - for (j=1; j0; i--) - nptr[i] = nptr[i-1]; - nptr[0] = 0; - - - /* OK, now compute a nodal partition based on the element partition npart */ - idxset(*nn, -1, npart); - pwgts = idxsmalloc(*nparts, 0, "METIS_MESHPARTDUAL: pwgts"); - for (i=0; i<*nn; i++) { - me = epart[nind[nptr[i]]]; - for (j=nptr[i]+1; j 0); + + /* assign it first to the domain with most things in common */ + rpart[i] = nbrdom[iargmax(nnbrs, nbrwgt,1)]; + + /* if overweight, assign it to the light domain */ + if (pwgts[rpart[i]] > itpwgts[rpart[i]]) { for (j=0; j +#ifndef _METIS_H_ +#define _METIS_H_ + +/**************************************************************************** +* A set of defines that can be modified by the user +*****************************************************************************/ + +/*-------------------------------------------------------------------------- + Specifies the width of the elementary data type that will hold information + about vertices and their adjacency lists. + + Possible values: + 32 : Use 32 bit signed integers + 64 : Use 64 bit signed integers + + A width of 64 should be specified if the number of vertices or the total + number of edges in the graph exceed the limits of a 32 bit signed integer + i.e., 2^31-1. + Proper use of 64 bit integers requires that the c99 standard datatypes + int32_t and int64_t are supported by the compiler. + GCC does provides these definitions in stdint.h, but it may require some + modifications on other architectures. +--------------------------------------------------------------------------*/ +// IDXTYPEWIDTH must be set to 32, setting it to 64 causes a failure. +#define IDXTYPEWIDTH 32 + +/*-------------------------------------------------------------------------- + Specifies the data type that will hold floating-point style information. + + Possible values: + 32 : single precision floating point (float) + 64 : double precision floating point (double) +--------------------------------------------------------------------------*/ +// REALTYPEWIDTH must be set to 32, setting it to 64 causes a failure. +#define REALTYPEWIDTH 32 + +/**************************************************************************** +* In principle, nothing needs to be changed beyond this point, unless the +* int32_t and int64_t cannot be found in the normal places. +*****************************************************************************/ + +/* Uniform definitions for various compilers */ +#if defined(_MSC_VER) + #define COMPILER_MSC +#endif +#if defined(__ICC) + #define COMPILER_ICC +#endif +#if defined(__GNUC__) + #define COMPILER_GCC +#endif + +/* Include c99 int definitions and need constants. When building the library, + * these are already defined by GKlib; hence the test for _GKLIB_H_ */ +#ifndef _GKLIB_H_ +#ifdef COMPILER_MSC +#include + +typedef __int32 int32_t; +typedef __int64 int64_t; +#define PRId32 "I32d" +#define PRId64 "I64d" +#define SCNd32 "ld" +#define SCNd64 "I64d" +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#else +#include +#endif +#endif + + +/*------------------------------------------------------------------------ +* Setup the basic datatypes +*-------------------------------------------------------------------------*/ +#if IDXTYPEWIDTH == 32 + typedef int32_t idx_t; + + #define IDX_MAX INT32_MAX + #define IDX_MIN INT32_MIN + + #define SCIDX SCNd32 + #define PRIDX PRId32 + + #define strtoidx strtol + #define iabs abs +#elif IDXTYPEWIDTH == 64 + typedef int64_t idx_t; + + #define IDX_MAX INT64_MAX + #define IDX_MIN INT64_MIN + + #define SCIDX SCNd64 + #define PRIDX PRId64 + +#ifdef COMPILER_MSC + #define strtoidx _strtoi64 +#else + #define strtoidx strtoll +#endif + #define iabs labs +#else + #error "Incorrect user-supplied value fo IDXTYPEWIDTH" +#endif + + +#if REALTYPEWIDTH == 32 + typedef float real_t; + + #define SCREAL "f" + #define PRREAL "f" + #define REAL_MAX FLT_MAX + #define REAL_MIN FLT_MIN + #define REAL_EPSILON FLT_EPSILON + + #define rabs fabsf + #define REALEQ(x,y) ((rabs((x)-(y)) <= FLT_EPSILON)) + +#ifdef COMPILER_MSC + #define strtoreal (float)strtod +#else + #define strtoreal strtof +#endif +#elif REALTYPEWIDTH == 64 + typedef double real_t; + + #define SCREAL "lf" + #define PRREAL "lf" + #define REAL_MAX DBL_MAX + #define REAL_MIN DBL_MIN + #define REAL_EPSILON DBL_EPSILON + + #define rabs fabs + #define REALEQ(x,y) ((rabs((x)-(y)) <= DBL_EPSILON)) + + #define strtoreal strtod +#else + #error "Incorrect user-supplied value for REALTYPEWIDTH" +#endif + + +/*------------------------------------------------------------------------ +* Constant definitions +*-------------------------------------------------------------------------*/ +/* Metis's version number */ +#define METIS_VER_MAJOR 5 +#define METIS_VER_MINOR 2 +#define METIS_VER_SUBMINOR 1 + +/* The maximum length of the options[] array */ +#define METIS_NOPTIONS 40 + + -#ifdef DMALLOC -#include +/*------------------------------------------------------------------------ +* Function prototypes +*-------------------------------------------------------------------------*/ + +#ifdef _WINDLL +#define METIS_API(type) __declspec(dllexport) type __cdecl +#elif defined(__cdecl) +#define METIS_API(type) type __cdecl +#else +#define METIS_API(type) type +#endif + + + +#ifdef __cplusplus +extern "C" { +#endif + +METIS_API(int) METIS_PartGraphRecursive(idx_t *nvtxs, idx_t *ncon, idx_t *xadj, + idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, + idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, + idx_t *edgecut, idx_t *part); + +METIS_API(int) METIS_PartGraphKway(idx_t *nvtxs, idx_t *ncon, idx_t *xadj, + idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, + idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, + idx_t *edgecut, idx_t *part); + +METIS_API(int) METIS_MeshToDual(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, + idx_t *ncommon, idx_t *numflag, idx_t **r_xadj, idx_t **r_adjncy); + +METIS_API(int) METIS_MeshToNodal(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, + idx_t *numflag, idx_t **r_xadj, idx_t **r_adjncy); + +METIS_API(int) METIS_PartMeshNodal(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, + idx_t *vwgt, idx_t *vsize, idx_t *nparts, real_t *tpwgts, + idx_t *options, idx_t *objval, idx_t *epart, idx_t *npart); + +METIS_API(int) METIS_PartMeshDual(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, + idx_t *vwgt, idx_t *vsize, idx_t *ncommon, idx_t *nparts, + real_t *tpwgts, idx_t *options, idx_t *objval, idx_t *epart, + idx_t *npart); + +METIS_API(int) METIS_NodeND(idx_t *nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t *options, idx_t *perm, idx_t *iperm); + +METIS_API(int) METIS_Free(void *ptr); + +METIS_API(int) METIS_SetDefaultOptions(idx_t *options); + + +/* These functions are used by ParMETIS */ + +METIS_API(int) METIS_NodeNDP(idx_t nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t npes, idx_t *options, idx_t *perm, idx_t *iperm, + idx_t *sizes); + +METIS_API(int) METIS_ComputeVertexSeparator(idx_t *nvtxs, idx_t *xadj, idx_t *adjncy, + idx_t *vwgt, idx_t *options, idx_t *sepsize, idx_t *part); + +METIS_API(int) METIS_NodeRefine(idx_t nvtxs, idx_t *xadj, idx_t *vwgt, idx_t *adjncy, + idx_t *where, idx_t *hmarker, real_t ubfactor); + + +/* These functions are used by DGL */ + +METIS_API(int) METIS_CacheFriendlyReordering(idx_t nvtxs, idx_t *xadj, idx_t *adjncy, + idx_t *part, idx_t *old2new); + +#ifdef __cplusplus +} #endif -// moved this to metis_svfsi directory -// #include "../parmetis.h" /* Get the idxtype definition */ -#include /* Get the idxtype definition */ -#include -#include -#include -#include -#include + +/*------------------------------------------------------------------------ +* Enum type definitions +*-------------------------------------------------------------------------*/ +/*! Return codes */ +typedef enum { + METIS_OK = 1, /*!< Returned normally */ + METIS_ERROR_INPUT = -2, /*!< Returned due to erroneous inputs and/or options */ + METIS_ERROR_MEMORY = -3, /*!< Returned due to insufficient memory */ + METIS_ERROR = -4 /*!< Some other errors */ +} rstatus_et; + + +/*! Operation type codes */ +typedef enum { + METIS_OP_PMETIS, + METIS_OP_KMETIS, + METIS_OP_OMETIS +} moptype_et; + + +/*! Options codes (i.e., options[]) */ +typedef enum { + METIS_OPTION_PTYPE, + METIS_OPTION_OBJTYPE, + METIS_OPTION_CTYPE, + METIS_OPTION_IPTYPE, + METIS_OPTION_RTYPE, + METIS_OPTION_DBGLVL, + METIS_OPTION_NIPARTS, + METIS_OPTION_NITER, + METIS_OPTION_NCUTS, + METIS_OPTION_SEED, + METIS_OPTION_ONDISK, + METIS_OPTION_MINCONN, + METIS_OPTION_CONTIG, + METIS_OPTION_COMPRESS, + METIS_OPTION_CCORDER, + METIS_OPTION_PFACTOR, + METIS_OPTION_NSEPS, + METIS_OPTION_UFACTOR, + METIS_OPTION_NUMBERING, + METIS_OPTION_DROPEDGES, + METIS_OPTION_NO2HOP, + METIS_OPTION_TWOHOP, + METIS_OPTION_FAST, + + /* Used for command-line parameter purposes */ + METIS_OPTION_HELP, + METIS_OPTION_TPWGTS, + METIS_OPTION_NCOMMON, + METIS_OPTION_NOOUTPUT, + METIS_OPTION_BALANCE, + METIS_OPTION_GTYPE, + METIS_OPTION_UBVEC +} moptions_et; + + +/*! Partitioning Schemes */ +typedef enum { + METIS_PTYPE_RB, + METIS_PTYPE_KWAY +} mptype_et; + +/*! Graph types for meshes */ +typedef enum { + METIS_GTYPE_DUAL, + METIS_GTYPE_NODAL +} mgtype_et; + +/*! Coarsening Schemes */ +typedef enum { + METIS_CTYPE_RM, + METIS_CTYPE_SHEM +} mctype_et; + +/*! Initial partitioning schemes */ +typedef enum { + METIS_IPTYPE_GROW, + METIS_IPTYPE_RANDOM, + METIS_IPTYPE_EDGE, + METIS_IPTYPE_NODE, + METIS_IPTYPE_METISRB +} miptype_et; + + +/*! Refinement schemes */ +typedef enum { + METIS_RTYPE_FM, + METIS_RTYPE_GREEDY, + METIS_RTYPE_SEP2SIDED, + METIS_RTYPE_SEP1SIDED +} mrtype_et; + + +/*! Debug Levels */ +typedef enum { + METIS_DBG_INFO = 1, /*!< Shows various diagnostic messages */ + METIS_DBG_TIME = 2, /*!< Perform timing analysis */ + METIS_DBG_COARSEN = 4, /*!< Show the coarsening progress */ + METIS_DBG_REFINE = 8, /*!< Show the refinement progress */ + METIS_DBG_IPART = 16, /*!< Show info on initial partitioning */ + METIS_DBG_MOVEINFO = 32, /*!< Show info on vertex moves during refinement */ + METIS_DBG_SEPINFO = 64, /*!< Show info on vertex moves during sep refinement */ + METIS_DBG_CONNINFO = 128, /*!< Show info on minimization of subdomain connectivity */ + METIS_DBG_CONTIGINFO = 256, /*!< Show info on elimination of connected components */ + METIS_DBG_MEMORY = 2048 /*!< Show info related to wspace allocation */ +} mdbglvl_et; + + +/* Types of objectives */ +typedef enum { + METIS_OBJTYPE_CUT, + METIS_OBJTYPE_VOL, + METIS_OBJTYPE_NODE +} mobjtype_et; + + + +#endif /* _METIS_H_ */ diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/metislib.h b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/metislib.h new file mode 100644 index 00000000..dc224f42 --- /dev/null +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/metislib.h @@ -0,0 +1,41 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * metis.h + * + * This file includes all necessary header files + * + * Started 8/27/94 + * George + * + * $Id: metislib.h 10655 2011-08-02 17:38:11Z benjamin $ + */ + +#ifndef _LIBMETIS_METISLIB_H_ +#define _LIBMETIS_METISLIB_H_ + +#include + +#if defined(ENABLE_OPENMP) + #include +#endif + + +#include +#include "rename.h" +#include "gklib_defs.h" + +#include "defs.h" +#include "struct.h" +#include "macros.h" +#include "proto.h" + + +#if defined(COMPILER_MSC) +#if defined(rint) + #undef rint +#endif +#define rint(x) ((idx_t)((x)+0.5)) /* MSC does not have rint() function */ +#endif + +#endif diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mfm.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mfm.c deleted file mode 100644 index d0047e5b..00000000 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mfm.c +++ /dev/null @@ -1,341 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * mfm.c - * - * This file contains code that implements the edge-based FM refinement - * - * Started 7/23/97 - * George - * - * $Id: mfm.c,v 1.1 2003/07/24 18:39:09 karypis Exp $ - */ - -#include - - -/************************************************************************* -* This function performs an edge-based FM refinement -**************************************************************************/ -void MocFM_2WayEdgeRefine(CtrlType *ctrl, GraphType *graph, float *tpwgts, int npasses) -{ - int i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, me, limit, tmp, cnum; - idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; - idxtype *moved, *swaps, *perm, *qnum; - float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal; - PQueueType parts[MAXNCON][2]; - int higain, oldgain, mincut, initcut, newcut, mincutorder; - float rtpwgts[2]; - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - nvwgt = graph->nvwgt; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - where = graph->where; - id = graph->id; - ed = graph->ed; - npwgts = graph->npwgts; - bndptr = graph->bndptr; - bndind = graph->bndind; - - moved = idxwspacemalloc(ctrl, nvtxs); - swaps = idxwspacemalloc(ctrl, nvtxs); - perm = idxwspacemalloc(ctrl, nvtxs); - qnum = idxwspacemalloc(ctrl, nvtxs); - - limit = amin(amax(0.01*nvtxs, 25), 150); - - /* Initialize the queues */ - for (i=0; idbglvl&DBG_REFINE) { - printf("Parts: ["); - for (l=0; lnvtxs, graph->nbnd, graph->mincut, origbal); - } - - idxset(nvtxs, -1, moved); - for (pass=0; passmincut; - for (i=0; imincut); - ASSERT(CheckBnd(graph)); - - /* Insert boundary nodes in the priority queues */ - nbnd = graph->nbnd; - RandomPermute(nbnd, perm, 1); - for (ii=0; ii 0 || id[i] == 0); - ASSERT(bndptr[i] != -1); - PQueueInsert(&parts[qnum[i]][where[i]], i, ed[i]-id[i]); - } - - for (nswaps=0; nswaps limit) { /* We hit the limit, undo last move */ - newcut += (ed[higain]-id[higain]); - saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); - saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); - break; - } - - where[higain] = to; - moved[higain] = nswaps; - swaps[nswaps] = higain; - - if (ctrl->dbglvl&DBG_MOVEINFO) { - printf("Moved %6d from %d(%d). Gain: %5d, Cut: %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut); - for (l=0; l 0) { /* It will now become a boundary vertex */ - BNDInsert(nbnd, bndind, bndptr, k); - if (moved[k] == -1) - PQueueInsert(&parts[qnum[k]][where[k]], k, ed[k]-id[k]); - } - } - } - - } - - - /**************************************************************** - * Roll back computations - *****************************************************************/ - for (i=0; imincutorder; nswaps--) { - higain = swaps[nswaps]; - - to = where[higain] = (where[higain]+1)%2; - SWAP(id[higain], ed[higain], tmp); - if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) - BNDDelete(nbnd, bndind, bndptr, higain); - else if (ed[higain] > 0 && bndptr[higain] == -1) - BNDInsert(nbnd, bndind, bndptr, higain); - - saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); - saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); - for (j=xadj[higain]; j 0) - BNDInsert(nbnd, bndind, bndptr, k); - } - } - - if (ctrl->dbglvl&DBG_REFINE) { - printf("\tMincut: %6d at %5d, NBND: %6d, NPwgts: [", mincut, mincutorder, nbnd); - for (l=0; lmincut = mincut; - graph->nbnd = nbnd; - - if (mincutorder == -1 || mincut == initcut) - break; - } - - for (i=0; i= maxdiff) { - maxdiff = npwgts[part*ncon+i]-tpwgts[part]; - *from = part; - *cnum = i; - } - } - } - - /* printf("Selected %d(%d) -> %d\n", *from, *cnum, PQueueGetSize(&queues[*cnum][*from])); */ - - if (*from != -1 && PQueueGetSize(&queues[*cnum][*from]) == 0) { - /* The desired queue is empty, select a node from that side anyway */ - for (i=0; i 0) { - max = npwgts[(*from)*ncon + i]; - *cnum = i; - break; - } - } - - for (i++; i max && PQueueGetSize(&queues[i][*from]) > 0) { - max = npwgts[(*from)*ncon + i]; - *cnum = i; - } - } - } - - /* Check to see if you can focus on the cut */ - if (maxdiff <= 0.0 || *from == -1) { - maxgain = -100000; - - for (part=0; part<2; part++) { - for (i=0; i 0 && PQueueGetKey(&queues[i][part]) > maxgain) { - maxgain = PQueueGetKey(&queues[i][part]); - *from = part; - *cnum = i; - } - } - } - } -} - - - - - -/************************************************************************* -* This function checks if the balance achieved is better than the diff -* For now, it uses a 2-norm measure -**************************************************************************/ -int BetterBalance(int ncon, float *npwgts, float *tpwgts, float *diff) -{ - int i; - float ndiff[MAXNCON]; - - for (i=0; i - - -/************************************************************************* -* This function performs an edge-based FM refinement -**************************************************************************/ -void MocFM_2WayEdgeRefine2(CtrlType *ctrl, GraphType *graph, float *tpwgts, float *orgubvec, - int npasses) -{ - int i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, me, limit, tmp, cnum; - idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; - idxtype *moved, *swaps, *perm, *qnum; - float *nvwgt, *npwgts, origdiff[MAXNCON], origbal[MAXNCON], minbal[MAXNCON]; - PQueueType parts[MAXNCON][2]; - int higain, oldgain, mincut, initcut, newcut, mincutorder; - float *maxwgt, *minwgt, ubvec[MAXNCON], tvec[MAXNCON]; - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - nvwgt = graph->nvwgt; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - where = graph->where; - id = graph->id; - ed = graph->ed; - npwgts = graph->npwgts; - bndptr = graph->bndptr; - bndind = graph->bndind; - - moved = idxwspacemalloc(ctrl, nvtxs); - swaps = idxwspacemalloc(ctrl, nvtxs); - perm = idxwspacemalloc(ctrl, nvtxs); - qnum = idxwspacemalloc(ctrl, nvtxs); - - limit = amin(amax(0.01*nvtxs, 15), 100); - - Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, origbal); - for (i=0; idbglvl&DBG_REFINE) { - printf("Parts: ["); - for (l=0; lnvtxs, graph->nbnd, graph->mincut); - for (i=0; imincut; - Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, minbal); - - ASSERT(ComputeCut(graph, where) == graph->mincut); - ASSERT(CheckBnd(graph)); - - /* Insert boundary nodes in the priority queues */ - nbnd = graph->nbnd; - RandomPermute(nbnd, perm, 1); - for (ii=0; ii 0 || id[i] == 0); - ASSERT(bndptr[i] != -1); - PQueueInsert(&parts[qnum[i]][where[i]], i, ed[i]-id[i]); - } - - for (nswaps=0; nswaps limit) { /* We hit the limit, undo last move */ - newcut += (ed[higain]-id[higain]); - saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); - saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); - break; - } - - where[higain] = to; - moved[higain] = nswaps; - swaps[nswaps] = higain; - - if (ctrl->dbglvl&DBG_MOVEINFO) { - printf("Moved %6d from %d(%d). Gain: %5d, Cut: %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut); - for (l=0; l 0) { /* It will now become a boundary vertex */ - BNDInsert(nbnd, bndind, bndptr, k); - if (moved[k] == -1) - PQueueInsert(&parts[qnum[k]][where[k]], k, ed[k]-id[k]); - } - } - } - - } - - - /**************************************************************** - * Roll back computations - *****************************************************************/ - for (i=0; imincutorder; nswaps--) { - higain = swaps[nswaps]; - - to = where[higain] = (where[higain]+1)%2; - SWAP(id[higain], ed[higain], tmp); - if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) - BNDDelete(nbnd, bndind, bndptr, higain); - else if (ed[higain] > 0 && bndptr[higain] == -1) - BNDInsert(nbnd, bndind, bndptr, higain); - - saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); - saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); - for (j=xadj[higain]; j 0) - BNDInsert(nbnd, bndind, bndptr, k); - } - } - - if (ctrl->dbglvl&DBG_REFINE) { - printf("\tMincut: %6d at %5d, NBND: %6d, NPwgts: [", mincut, mincutorder, nbnd); - for (l=0; lmincut = mincut; - graph->nbnd = nbnd; - - if (mincutorder == -1 || mincut == initcut) - break; - } - - for (i=0; i= maxdiff) { - maxdiff = diff; - *from = j; - *cnum = i; - } - } - } - - if (*from != -1 && PQueueGetSize(&queues[*cnum][*from]) == 0) { - /* The desired queue is empty, select a node from that side anyway */ - for (i=0; i 0) { - max = (npwgts[(*from)*ncon+i] - maxwgt[(*from)*ncon+i]); - *cnum = i; - break; - } - } - - for (i++; i max && PQueueGetSize(&queues[i][*from]) > 0) { - max = diff; - *cnum = i; - } - } - } - - /* Check to see if you can focus on the cut */ - if (maxdiff <= 0.0) { - maxgain = -100000; - - for (j=0; j<2; j++) { - for (i=0; i 0 && PQueueGetKey(&queues[i][j]) > maxgain) { - maxgain = PQueueGetKey(&queues[i][j]); - *from = j; - *cnum = i; - } - } - } - - /* printf("(%2d %2d) %3d\n", *from, *cnum, maxgain); */ - } -} - - -/************************************************************************* -* This function checks if the newbal is better than oldbal given the -* ubvector ubvec -**************************************************************************/ -int IsBetter2wayBalance(int ncon, float *newbal, float *oldbal, float *ubvec) -{ - int i, j; - float max1=0.0, max2=0.0, sum1=0.0, sum2=0.0, tmp; - - for (i=0; i max2) - return 0; - else - return sum1 <= sum2; -} - - diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/minconn.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/minconn.c new file mode 100644 index 00000000..fce4fa1c --- /dev/null +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/minconn.c @@ -0,0 +1,729 @@ +/*! +\file +\brief Functions that deal with prunning the number of adjacent subdomains in kmetis + +\date Started 7/15/98 +\author George +\author Copyright 1997-2009, Regents of the University of Minnesota +\version $Id: minconn.c 17513 2014-08-05 16:20:50Z dominique $ +*/ + +#include "metislib.h" + + +/*************************************************************************/ +/*! This function computes the subdomain graph storing the result in the + pre-allocated worspace arrays */ +/*************************************************************************/ +void ComputeSubDomainGraph(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, ii, j, pid, other, nparts, nvtxs, nnbrs; + idx_t *xadj, *adjncy, *adjwgt, *where; + idx_t *pptr, *pind; + idx_t nads=0, *vadids, *vadwgts; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + where = graph->where; + + nparts = ctrl->nparts; + + vadids = ctrl->pvec1; + vadwgts = iset(nparts, 0, ctrl->pvec2); + + pptr = iwspacemalloc(ctrl, nparts+1); + pind = iwspacemalloc(ctrl, nvtxs); + iarray2csr(nvtxs, nparts, where, pptr, pind); + + for (pid=0; pidobjtype) { + case METIS_OBJTYPE_CUT: + { + ckrinfo_t *rinfo; + cnbr_t *nbrs; + + rinfo = graph->ckrinfo; + for (nads=0, ii=pptr[pid]; ii 0) { + nnbrs = rinfo[i].nnbrs; + nbrs = ctrl->cnbrpool + rinfo[i].inbr; + + for (j=0; jvkrinfo; + for (nads=0, ii=pptr[pid]; ii 0) { + nnbrs = rinfo[i].nnbrs; + nbrs = ctrl->vnbrpool + rinfo[i].inbr; + + for (j=0; jobjtype); + } + + /* See if you have enough memory to store the adjacent info for that subdomain */ + if (ctrl->maxnads[pid] < nads) { + ctrl->maxnads[pid] = 2*nads; + ctrl->adids[pid] = irealloc(ctrl->adids[pid], ctrl->maxnads[pid], + "ComputeSubDomainGraph: adids[pid]"); + ctrl->adwgts[pid] = irealloc(ctrl->adwgts[pid], ctrl->maxnads[pid], + "ComputeSubDomainGraph: adids[pid]"); + } + + ctrl->nads[pid] = nads; + for (j=0; jadids[pid][j] = vadids[j]; + ctrl->adwgts[pid][j] = vadwgts[vadids[j]]; + + vadwgts[vadids[j]] = 0; + } + } + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function updates the weight of an edge in the subdomain graph by + adding to it the value of ewgt. The update can either increase or + decrease the weight of the subdomain edge based on the value of ewgt. + + \param u is the ID of one of the incident subdomains to the edge + \param v is the ID of the other incident subdomains to the edge + \param ewgt is the weight to be added to the subdomain edge + \param nparts is the number of subdomains + \param r_maxndoms is the maximum number of adjacent subdomains and is + updated as necessary. The update is skipped if a NULL value is + supplied. +*/ +/*************************************************************************/ +void UpdateEdgeSubDomainGraph(ctrl_t *ctrl, idx_t u, idx_t v, idx_t ewgt, + idx_t *r_maxndoms) +{ + idx_t i, j, nads; + + if (ewgt == 0) + return; + + for (i=0; i<2; i++) { + nads = ctrl->nads[u]; + /* Find the edge */ + for (j=0; jadids[u][j] == v) { + ctrl->adwgts[u][j] += ewgt; + break; + } + } + + if (j == nads) { + /* Deal with the case in which the edge was not found */ + ASSERT(ewgt > 0); + if (ctrl->maxnads[u] == nads) { + ctrl->maxnads[u] = 2*(nads+1); + ctrl->adids[u] = irealloc(ctrl->adids[u], ctrl->maxnads[u], + "IncreaseEdgeSubDomainGraph: adids[pid]"); + ctrl->adwgts[u] = irealloc(ctrl->adwgts[u], ctrl->maxnads[u], + "IncreaseEdgeSubDomainGraph: adids[pid]"); + } + ctrl->adids[u][nads] = v; + ctrl->adwgts[u][nads] = ewgt; + nads++; + if (r_maxndoms != NULL && nads > *r_maxndoms) { + printf("You just increased the maxndoms: %"PRIDX" %"PRIDX"\n", + nads, *r_maxndoms); + *r_maxndoms = nads; + } + } + else { + /* See if the updated edge becomes 0 */ + ASSERT(ctrl->adwgts[u][j] >= 0); + if (ctrl->adwgts[u][j] == 0) { + ctrl->adids[u][j] = ctrl->adids[u][nads-1]; + ctrl->adwgts[u][j] = ctrl->adwgts[u][nads-1]; + nads--; + if (r_maxndoms != NULL && nads+1 == *r_maxndoms) + *r_maxndoms = ctrl->nads[iargmax(ctrl->nparts, ctrl->nads,1)]; + } + } + ctrl->nads[u] = nads; + + SWAP(u, v, j); + } +} + + +/*************************************************************************/ +/*! This function computes the subdomain graph */ +/*************************************************************************/ +void EliminateSubDomainEdges(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, ii, j, k, ncon, nparts, scheme, pid_from, pid_to, me, other, nvtxs, + total, max, avg, totalout, nind=0, ncand=0, ncand2, target, target2, + nadd, bestnadd=0; + idx_t min, move, *cpwgt; + idx_t *xadj, *adjncy, *vwgt, *adjwgt, *pwgts, *where, *maxpwgt, + *mypmat, *otherpmat, *kpmat, *ind; + idx_t *nads, **adids, **adwgts; + ikv_t *cand, *cand2; + ipq_t queue; + real_t *tpwgts, badfactor=1.4; + idx_t *pptr, *pind; + idx_t *vmarker=NULL, *pmarker=NULL, *modind=NULL; /* volume specific work arrays */ + + WCOREPUSH; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + adjncy = graph->adjncy; + vwgt = graph->vwgt; + adjwgt = (ctrl->objtype == METIS_OBJTYPE_VOL ? NULL : graph->adjwgt); + + where = graph->where; + pwgts = graph->pwgts; /* We assume that this is properly initialized */ + + nparts = ctrl->nparts; + tpwgts = ctrl->tpwgts; + + cpwgt = iwspacemalloc(ctrl, ncon); + maxpwgt = iwspacemalloc(ctrl, nparts*ncon); + ind = iwspacemalloc(ctrl, nvtxs); + otherpmat = iset(nparts, 0, iwspacemalloc(ctrl, nparts)); + + cand = ikvwspacemalloc(ctrl, nparts); + cand2 = ikvwspacemalloc(ctrl, nparts); + + pptr = iwspacemalloc(ctrl, nparts+1); + pind = iwspacemalloc(ctrl, nvtxs); + iarray2csr(nvtxs, nparts, where, pptr, pind); + + if (ctrl->objtype == METIS_OBJTYPE_VOL) { + /* Vol-refinement specific working arrays */ + modind = iwspacemalloc(ctrl, nvtxs); + vmarker = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + pmarker = iset(nparts, -1, iwspacemalloc(ctrl, nparts)); + } + + + /* Compute the pmat matrix and ndoms */ + ComputeSubDomainGraph(ctrl, graph); + + nads = ctrl->nads; + adids = ctrl->adids; + adwgts = ctrl->adwgts; + + mypmat = iset(nparts, 0, ctrl->pvec1); + kpmat = iset(nparts, 0, ctrl->pvec2); + + /* Compute the maximum allowed weight for each domain */ + for (i=0; itvwgt[j]*ctrl->ubfactors[j]; + } + + ipqInit(&queue, nparts); + + /* Get into the loop eliminating subdomain connections */ + while (1) { + total = isum(nparts, nads, 1); + avg = total/nparts; + max = nads[iargmax(nparts, nads,1)]; + + IFSET(ctrl->dbglvl, METIS_DBG_CONNINFO, + printf("Adjacent Subdomain Stats: Total: %3"PRIDX", " + "Max: %3"PRIDX"[%zu], Avg: %3"PRIDX"\n", + total, max, iargmax(nparts, nads,1), avg)); + + if (max < badfactor*avg) + break; + + /* Add the subdomains that you will try to reduce their connectivity */ + ipqReset(&queue); + for (i=0; i= avg + (max-avg)/2) + ipqInsert(&queue, i, nads[i]); + } + + move = 0; + while ((me = ipqGetTop(&queue)) != -1) { + totalout = isum(nads[me], adwgts[me], 1); + + for (ncand2=0, i=0; idbglvl, METIS_DBG_CONNINFO, + printf("Me: %"PRIDX", Degree: %4"PRIDX", TotalOut: %"PRIDX",\n", + me, nads[me], totalout)); + + /* Sort the connections according to their cut */ + ikvsorti(ncand2, cand2); + + /* Two schemes are used for eliminating subdomain edges. + The first, tries to eliminate subdomain edges by moving remote groups + of vertices to subdomains that 'me' is already connected to. + The second, tries to eliminate subdomain edges by moving entire sets of + my vertices that connect to the 'other' subdomain to a subdomain that + I'm already connected to. + These two schemes are applied in sequence. */ + target = target2 = -1; + for (scheme=0; scheme<2; scheme++) { + for (min=0; min 0); + } + + ikvsortd(ncand, cand); + + IFSET(ctrl->dbglvl, METIS_DBG_CONNINFO, + printf("\tMinOut: %4"PRIDX", to: %3"PRIDX", TtlWgt: %5"PRIDX"[#:%"PRIDX"]\n", + mypmat[other], other, isum(ncon, cpwgt, 1), nind)); + + /* Go through and select the first domain that is common with 'me', and does + not increase the nads[target] higher than nads[me], subject to the maxpwgt + constraint. Traversal is done from the mostly connected to the least. */ + for (i=0; i 0) { + /* Check if balance will go off */ + if (!ivecaxpylez(ncon, 1, cpwgt, pwgts+k*ncon, maxpwgt+k*ncon)) + continue; + + /* get a dense vector out of k's connectivity */ + for (j=0; j 0 && kpmat[j] == 0 && nads[j]+1 >= nads[me]) + break; + } + + /* There were no bad second level effects. See if you can find a + subdomain to move to. */ + if (j == nparts) { + for (nadd=0, j=0; j 0 && kpmat[j] == 0) + nadd++; + } + + IFSET(ctrl->dbglvl, METIS_DBG_CONNINFO, + printf("\t\tto=%"PRIDX", nadd=%"PRIDX", %"PRIDX"\n", k, nadd, nads[k])); + + if (nads[k]+nadd < nads[me]) { + if (target2 == -1 || nads[target2]+bestnadd > nads[k]+nadd || + (nads[target2]+bestnadd == nads[k]+nadd && bestnadd > nadd)) { + target2 = k; + bestnadd = nadd; + } + } + + if (nadd == 0) + target = k; + } + + /* reset kpmat for the next iteration */ + for (j=0; jdbglvl, METIS_DBG_CONNINFO, + printf("\t\tScheme: %"PRIDX". Moving to %"PRIDX"\n", scheme, target)); + move = 1; + break; + } + } + + if (target != -1) + break; /* A move was found. No need to try the other scheme */ + } + + /* reset the mypmat for next iteration */ + for (i=0; iobjtype) { + case METIS_OBJTYPE_CUT: + MoveGroupMinConnForCut(ctrl, graph, target, nind, ind); + break; + case METIS_OBJTYPE_VOL: + MoveGroupMinConnForVol(ctrl, graph, target, nind, ind, vmarker, + pmarker, modind); + break; + default: + gk_errexit(SIGERR, "Unknown objtype of %d\n", ctrl->objtype); + } + + /* Update the csr representation of the partitioning vector */ + iarray2csr(nvtxs, nparts, where, pptr, pind); + } + } + + if (move == 0) + break; + } + + ipqFree(&queue); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function moves a collection of vertices and updates their rinfo */ +/*************************************************************************/ +void MoveGroupMinConnForCut(ctrl_t *ctrl, graph_t *graph, idx_t to, idx_t nind, + idx_t *ind) +{ + idx_t i, ii, j, jj, k, l, nvtxs, nbnd, from, me; + idx_t *xadj, *adjncy, *adjwgt, *where, *bndptr, *bndind; + ckrinfo_t *myrinfo; + cnbr_t *mynbrs; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + + where = graph->where; + bndptr = graph->bndptr; + bndind = graph->bndind; + + nbnd = graph->nbnd; + + while (--nind>=0) { + i = ind[nind]; + from = where[i]; + + myrinfo = graph->ckrinfo+i; + if (myrinfo->inbr == -1) { + myrinfo->inbr = cnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]); + myrinfo->nnbrs = 0; + } + mynbrs = ctrl->cnbrpool + myrinfo->inbr; + + /* find the location of 'to' in myrinfo or create it if it is not there */ + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == to) + break; + } + if (k == myrinfo->nnbrs) { + ASSERT(k < xadj[i+1]-xadj[i]); + mynbrs[k].pid = to; + mynbrs[k].ed = 0; + myrinfo->nnbrs++; + } + + /* Update pwgts */ + iaxpy(graph->ncon, 1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+to*graph->ncon, 1); + iaxpy(graph->ncon, -1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+from*graph->ncon, 1); + + /* Update mincut */ + graph->mincut -= mynbrs[k].ed-myrinfo->id; + + /* Update subdomain connectivity graph to reflect the move of 'i' */ + UpdateEdgeSubDomainGraph(ctrl, from, to, myrinfo->id-mynbrs[k].ed, NULL); + + /* Update ID/ED and BND related information for the moved vertex */ + UpdateMovedVertexInfoAndBND(i, from, k, to, myrinfo, mynbrs, where, nbnd, + bndptr, bndind, BNDTYPE_REFINE); + + /* Update the degrees of adjacent vertices */ + for (j=xadj[i]; jckrinfo+ii; + + UpdateAdjacentVertexInfoAndBND(ctrl, ii, xadj[ii+1]-xadj[ii], me, + from, to, myrinfo, adjwgt[j], nbnd, bndptr, bndind, BNDTYPE_REFINE); + + /* Update subdomain graph to reflect the move of 'i' for domains other + than 'from' and 'to' */ + if (me != from && me != to) { + UpdateEdgeSubDomainGraph(ctrl, from, me, -adjwgt[j], NULL); + UpdateEdgeSubDomainGraph(ctrl, to, me, adjwgt[j], NULL); + } + } + } + + ASSERT(ComputeCut(graph, where) == graph->mincut); + + graph->nbnd = nbnd; + +} + + +/*************************************************************************/ +/*! This function moves a collection of vertices and updates their rinfo */ +/*************************************************************************/ +void MoveGroupMinConnForVol(ctrl_t *ctrl, graph_t *graph, idx_t to, idx_t nind, + idx_t *ind, idx_t *vmarker, idx_t *pmarker, idx_t *modind) +{ + idx_t i, ii, j, jj, k, l, nvtxs, from, me, other, xgain, ewgt; + idx_t *xadj, *vsize, *adjncy, *where; + vkrinfo_t *myrinfo, *orinfo; + vnbr_t *mynbrs, *onbrs; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vsize = graph->vsize; + adjncy = graph->adjncy; + where = graph->where; + + while (--nind>=0) { + i = ind[nind]; + from = where[i]; + + myrinfo = graph->vkrinfo+i; + if (myrinfo->inbr == -1) { + myrinfo->inbr = vnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]); + myrinfo->nnbrs = 0; + } + mynbrs = ctrl->vnbrpool + myrinfo->inbr; + + xgain = (myrinfo->nid == 0 && myrinfo->ned > 0 ? vsize[i] : 0); + + //printf("Moving %"PRIDX" from %"PRIDX" to %"PRIDX" [vsize: %"PRIDX"] [xgain: %"PRIDX"]\n", + // i, from, to, vsize[i], xgain); + + /* find the location of 'to' in myrinfo or create it if it is not there */ + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == to) + break; + } + + if (k == myrinfo->nnbrs) { + //printf("Missing neighbor\n"); + + if (myrinfo->nid > 0) + xgain -= vsize[i]; + + /* determine the volume gain resulting from that move */ + for (j=xadj[i]; jvkrinfo+ii; + onbrs = ctrl->vnbrpool + orinfo->inbr; + ASSERT(other != to) + + //printf(" %8d %8d %3d\n", (int)ii, (int)vsize[ii], (int)other); + + if (from == other) { + /* Same subdomain vertex: Decrease the gain if 'to' is a new neighbor. */ + for (l=0; lnnbrs; l++) { + if (onbrs[l].pid == to) + break; + } + if (l == orinfo->nnbrs) + xgain -= vsize[ii]; + } + else { + /* Remote vertex: increase if 'to' is a new subdomain */ + for (l=0; lnnbrs; l++) { + if (onbrs[l].pid == to) + break; + } + if (l == orinfo->nnbrs) + xgain -= vsize[ii]; + + /* Remote vertex: decrease if i is the only connection to 'from' */ + for (l=0; lnnbrs; l++) { + if (onbrs[l].pid == from && onbrs[l].ned == 1) { + xgain += vsize[ii]; + break; + } + } + } + } + graph->minvol -= xgain; + graph->mincut -= -myrinfo->nid; + ewgt = myrinfo->nid; + } + else { + graph->minvol -= (xgain + mynbrs[k].gv); + graph->mincut -= mynbrs[k].ned-myrinfo->nid; + ewgt = myrinfo->nid-mynbrs[k].ned; + } + + /* Update where and pwgts */ + where[i] = to; + iaxpy(graph->ncon, 1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+to*graph->ncon, 1); + iaxpy(graph->ncon, -1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+from*graph->ncon, 1); + + /* Update subdomain connectivity graph to reflect the move of 'i' */ + UpdateEdgeSubDomainGraph(ctrl, from, to, ewgt, NULL); + + /* Update the subdomain connectivity of the adjacent vertices */ + for (j=xadj[i]; jmincut); + ASSERTP(ComputeVolume(graph, where) == graph->minvol, + ("%"PRIDX" %"PRIDX"\n", ComputeVolume(graph, where), graph->minvol)); + +} + + +/*************************************************************************/ +/*! This function computes the subdomain graph. For deubugging purposes. */ +/*************************************************************************/ +void PrintSubDomainGraph(graph_t *graph, idx_t nparts, idx_t *where) +{ + idx_t i, j, k, me, nvtxs, total, max; + idx_t *xadj, *adjncy, *adjwgt, *pmat; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + + pmat = ismalloc(nparts*nparts, 0, "ComputeSubDomainGraph: pmat"); + + for (i=0; i 0) + k++; + } + total += k; + + if (k > max) + max = k; +/* + printf("%2"PRIDX" -> %2"PRIDX" ", i, k); + for (j=0; j 0) + printf("[%2"PRIDX" %4"PRIDX"] ", j, pmat[i*nparts+j]); + } + printf("\n"); +*/ + } + printf("Total adjacent subdomains: %"PRIDX", Max: %"PRIDX"\n", total, max); + + gk_free((void **)&pmat, LTERM); +} + + diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mincover.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mincover.c index ffa9069e..fce78647 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mincover.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mincover.c @@ -8,10 +8,10 @@ * Started 8/1/97 * George * - * $Id: mincover.c,v 1.1 2003/07/16 15:55:09 karypis Exp $ + * $Id: mincover.c 9942 2011-05-17 22:09:52Z karypis $ */ -#include +#include "metislib.h" /************************************************************************* * Constants used by mincover algorithm @@ -39,18 +39,18 @@ * cover : the actual cover (array) * csize : the size of the cover **************************************************************************/ -void MinCover(idxtype *xadj, idxtype *adjncy, int asize, int bsize, idxtype *cover, int *csize) +void MinCover(idx_t *xadj, idx_t *adjncy, idx_t asize, idx_t bsize, idx_t *cover, idx_t *csize) { - int i, j; - idxtype *mate, *queue, *flag, *level, *lst; - int fptr, rptr, lstptr; - int row, maxlevel, col; + idx_t i, j; + idx_t *mate, *queue, *flag, *level, *lst; + idx_t fptr, rptr, lstptr; + idx_t row, maxlevel, col; - mate = idxsmalloc(bsize, -1, "MinCover: mate"); - flag = idxmalloc(bsize, "MinCover: flag"); - level = idxmalloc(bsize, "MinCover: level"); - queue = idxmalloc(bsize, "MinCover: queue"); - lst = idxmalloc(bsize, "MinCover: lst"); + mate = ismalloc(bsize, -1, "MinCover: mate"); + flag = imalloc(bsize, "MinCover: flag"); + level = imalloc(bsize, "MinCover: level"); + queue = imalloc(bsize, "MinCover: queue"); + lst = imalloc(bsize, "MinCover: lst"); /* Get a cheap matching */ for (i=0; i - -/************************************************************************* -* This function computes the initial bisection of the coarsest graph -**************************************************************************/ -void MocInit2WayPartition(CtrlType *ctrl, GraphType *graph, float *tpwgts, float ubfactor) -{ - int i, dbglvl; - - dbglvl = ctrl->dbglvl; - IFSET(ctrl->dbglvl, DBG_REFINE, ctrl->dbglvl -= DBG_REFINE); - IFSET(ctrl->dbglvl, DBG_MOVEINFO, ctrl->dbglvl -= DBG_MOVEINFO); - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); - - switch (ctrl->IType) { - case IPART_GGPKL: - if (graph->nedges == 0) - MocRandomBisection(ctrl, graph, tpwgts, ubfactor); - else - MocGrowBisection(ctrl, graph, tpwgts, ubfactor); - break; - case IPART_RANDOM: - MocRandomBisection(ctrl, graph, tpwgts, ubfactor); - break; - default: - errexit("Unknown initial partition type: %d\n", ctrl->IType); - } - - IFSET(ctrl->dbglvl, DBG_IPART, printf("Initial Cut: %d\n", graph->mincut)); - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); - ctrl->dbglvl = dbglvl; - -} - - - - - -/************************************************************************* -* This function takes a graph and produces a bisection by using a region -* growing algorithm. The resulting partition is returned in -* graph->where -**************************************************************************/ -void MocGrowBisection(CtrlType *ctrl, GraphType *graph, float *tpwgts, float ubfactor) -{ - int i, j, k, nvtxs, ncon, from, bestcut, mincut, nbfs; - idxtype *bestwhere, *where; - - nvtxs = graph->nvtxs; - - MocAllocate2WayPartitionMemory(ctrl, graph); - where = graph->where; - - bestwhere = idxmalloc(nvtxs, "BisectGraph: bestwhere"); - nbfs = 2*(nvtxs <= ctrl->CoarsenTo ? SMALLNIPARTS : LARGENIPARTS); - bestcut = idxsum(graph->nedges, graph->adjwgt); - - for (; nbfs>0; nbfs--) { - idxset(nvtxs, 1, where); - where[RandomInRange(nvtxs)] = 0; - - MocCompute2WayPartitionParams(ctrl, graph); - - MocInit2WayBalance(ctrl, graph, tpwgts); - - MocFM_2WayEdgeRefine(ctrl, graph, tpwgts, 4); - - MocBalance2Way(ctrl, graph, tpwgts, 1.02); - MocFM_2WayEdgeRefine(ctrl, graph, tpwgts, 4); - - if (bestcut > graph->mincut) { - bestcut = graph->mincut; - idxcopy(nvtxs, where, bestwhere); - if (bestcut == 0) - break; - } - } - - graph->mincut = bestcut; - idxcopy(nvtxs, bestwhere, where); - - GKfree((void **)(&bestwhere), LTERM); -} - - - -/************************************************************************* -* This function takes a graph and produces a bisection by using a region -* growing algorithm. The resulting partition is returned in -* graph->where -**************************************************************************/ -void MocRandomBisection(CtrlType *ctrl, GraphType *graph, float *tpwgts, float ubfactor) -{ - int i, ii, j, k, nvtxs, ncon, from, bestcut, mincut, nbfs, qnum; - idxtype *bestwhere, *where, *perm; - int counts[MAXNCON]; - float *nvwgt; - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - nvwgt = graph->nvwgt; - - MocAllocate2WayPartitionMemory(ctrl, graph); - where = graph->where; - - bestwhere = idxmalloc(nvtxs, "BisectGraph: bestwhere"); - nbfs = 2*(nvtxs <= ctrl->CoarsenTo ? SMALLNIPARTS : LARGENIPARTS); - bestcut = idxsum(graph->nedges, graph->adjwgt); - perm = idxmalloc(nvtxs, "BisectGraph: perm"); - - for (; nbfs>0; nbfs--) { - for (i=0; imincut); - for (i=0; incon; i++) - printf("(%.3f %.3f) ", graph->npwgts[i], graph->npwgts[graph->ncon+i]); - printf("]\n"); - */ - - if (bestcut > graph->mincut) { - bestcut = graph->mincut; - idxcopy(nvtxs, where, bestwhere); - if (bestcut == 0) - break; - } - } - - graph->mincut = bestcut; - idxcopy(nvtxs, bestwhere, where); - - GKfree((void **)(&bestwhere), &perm, LTERM); -} - - - - -/************************************************************************* -* This function balances two partitions by moving the highest gain -* (including negative gain) vertices to the other domain. -* It is used only when tha unbalance is due to non contigous -* subdomains. That is, the are no boundary vertices. -* It moves vertices from the domain that is overweight to the one that -* is underweight. -**************************************************************************/ -void MocInit2WayBalance(CtrlType *ctrl, GraphType *graph, float *tpwgts) -{ - int i, ii, j, k, l, kwgt, nvtxs, nbnd, ncon, nswaps, from, to, pass, me, cnum, tmp; - idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; - idxtype *perm, *qnum; - float *nvwgt, *npwgts; - PQueueType parts[MAXNCON][2]; - int higain, oldgain, mincut; - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - adjncy = graph->adjncy; - nvwgt = graph->nvwgt; - adjwgt = graph->adjwgt; - where = graph->where; - id = graph->id; - ed = graph->ed; - npwgts = graph->npwgts; - bndptr = graph->bndptr; - bndind = graph->bndind; - - perm = idxwspacemalloc(ctrl, nvtxs); - qnum = idxwspacemalloc(ctrl, nvtxs); - - /* This is called for initial partitioning so we know from where to pick nodes */ - from = 1; - to = (from+1)%2; - - if (ctrl->dbglvl&DBG_REFINE) { - printf("Parts: ["); - for (l=0; lnvtxs, graph->nbnd, graph->mincut, - Compute2WayHLoadImbalance(ncon, npwgts, tpwgts)); - } - - for (i=0; imincut); - ASSERT(CheckBnd(graph)); - ASSERT(CheckGraph(graph)); - - /* Compute the queues in which each vertex will be assigned to */ - for (i=0; i 0) - PQueueInsert(&parts[qnum[i]][0], i, ed[i]-id[i]); - else - PQueueInsert(&parts[qnum[i]][1], i, ed[i]-id[i]); - } - } - - - mincut = graph->mincut; - nbnd = graph->nbnd; - for (nswaps=0; nswapsdbglvl&DBG_MOVEINFO) { - printf("Moved %6d from %d(%d). [%5d] %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], mincut); - for (l=0; l 0) - printf("\t Pulled from the interior!\n"); - } - - - /************************************************************** - * Update the id[i]/ed[i] values of the affected nodes - ***************************************************************/ - SWAP(id[higain], ed[higain], tmp); - if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) - BNDDelete(nbnd, bndind, bndptr, higain); - if (ed[higain] > 0 && bndptr[higain] == -1) - BNDInsert(nbnd, bndind, bndptr, higain); - - for (j=xadj[higain]; j 0 && bndptr[k] == -1) { /* It moves in boundary */ - PQueueDelete(&parts[qnum[k]][1], k, oldgain); - PQueueInsert(&parts[qnum[k]][0], k, ed[k]-id[k]); - } - else { /* It must be in the boundary already */ - if (bndptr[k] == -1) - printf("What you thought was wrong!\n"); - PQueueUpdate(&parts[qnum[k]][0], k, oldgain, ed[k]-id[k]); - } - } - - /* Update its boundary information */ - if (ed[k] == 0 && bndptr[k] != -1) - BNDDelete(nbnd, bndind, bndptr, k); - else if (ed[k] > 0 && bndptr[k] == -1) - BNDInsert(nbnd, bndind, bndptr, k); - } - - ASSERTP(ComputeCut(graph, where) == mincut, ("%d != %d\n", ComputeCut(graph, where), mincut)); - - } - - if (ctrl->dbglvl&DBG_REFINE) { - printf("\tMincut: %6d, NBND: %6d, NPwgts: ", mincut, nbnd); - for (l=0; lmincut = mincut; - graph->nbnd = nbnd; - - for (i=0; imincut); - ASSERT(CheckBnd(graph)); - - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); -} - - - - -/************************************************************************* -* This function selects the partition number and the queue from which -* we will move vertices out -**************************************************************************/ -int SelectQueueOneWay(int ncon, float *npwgts, float *tpwgts, int from, PQueueType queues[MAXNCON][2]) -{ - int i, cnum=-1; - float max=0.0; - - for (i=0; i= max && - PQueueGetSize(&queues[i][0]) + PQueueGetSize(&queues[i][1]) > 0) { - max = npwgts[from*ncon+i]-tpwgts[0]; - cnum = i; - } - } - - return cnum; -} - - diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/minitpart2.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/minitpart2.c deleted file mode 100644 index 2b208440..00000000 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/minitpart2.c +++ /dev/null @@ -1,368 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * minitpart2.c - * - * This file contains code that performs the initial partition of the - * coarsest graph - * - * Started 7/23/97 - * George - * - * $Id: minitpart2.c,v 1.1 2003/07/16 15:55:10 karypis Exp $ - * - */ - -#include - -/************************************************************************* -* This function computes the initial bisection of the coarsest graph -**************************************************************************/ -void MocInit2WayPartition2(CtrlType *ctrl, GraphType *graph, float *tpwgts, float *ubvec) -{ - int dbglvl; - - dbglvl = ctrl->dbglvl; - IFSET(ctrl->dbglvl, DBG_REFINE, ctrl->dbglvl -= DBG_REFINE); - IFSET(ctrl->dbglvl, DBG_MOVEINFO, ctrl->dbglvl -= DBG_MOVEINFO); - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); - - switch (ctrl->IType) { - case IPART_GGPKL: - case IPART_RANDOM: - MocGrowBisection2(ctrl, graph, tpwgts, ubvec); - break; - case 3: - MocGrowBisectionNew2(ctrl, graph, tpwgts, ubvec); - break; - default: - errexit("Unknown initial partition type: %d\n", ctrl->IType); - } - - IFSET(ctrl->dbglvl, DBG_IPART, printf("Initial Cut: %d\n", graph->mincut)); - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); - ctrl->dbglvl = dbglvl; - -} - - - - -/************************************************************************* -* This function takes a graph and produces a bisection by using a region -* growing algorithm. The resulting partition is returned in -* graph->where -**************************************************************************/ -void MocGrowBisection2(CtrlType *ctrl, GraphType *graph, float *tpwgts, float *ubvec) -{ - int i, j, k, nvtxs, ncon, from, bestcut, mincut, nbfs; - idxtype *bestwhere, *where; - - nvtxs = graph->nvtxs; - - MocAllocate2WayPartitionMemory(ctrl, graph); - where = graph->where; - - bestwhere = idxmalloc(nvtxs, "BisectGraph: bestwhere"); - nbfs = 2*(nvtxs <= ctrl->CoarsenTo ? SMALLNIPARTS : LARGENIPARTS); - bestcut = idxsum(graph->nedges, graph->adjwgt); - - for (; nbfs>0; nbfs--) { - idxset(nvtxs, 1, where); - where[RandomInRange(nvtxs)] = 0; - - MocCompute2WayPartitionParams(ctrl, graph); - - MocBalance2Way2(ctrl, graph, tpwgts, ubvec); - - MocFM_2WayEdgeRefine2(ctrl, graph, tpwgts, ubvec, 4); - - MocBalance2Way2(ctrl, graph, tpwgts, ubvec); - MocFM_2WayEdgeRefine2(ctrl, graph, tpwgts, ubvec, 4); - - if (bestcut > graph->mincut) { - bestcut = graph->mincut; - idxcopy(nvtxs, where, bestwhere); - if (bestcut == 0) - break; - } - } - - graph->mincut = bestcut; - idxcopy(nvtxs, bestwhere, where); - - GKfree((void **)(&bestwhere), LTERM); -} - - - - - - -/************************************************************************* -* This function takes a graph and produces a bisection by using a region -* growing algorithm. The resulting partition is returned in -* graph->where -**************************************************************************/ -void MocGrowBisectionNew2(CtrlType *ctrl, GraphType *graph, float *tpwgts, float *ubvec) -{ - int i, j, k, nvtxs, ncon, from, bestcut, mincut, nbfs; - idxtype *bestwhere, *where; - - nvtxs = graph->nvtxs; - - MocAllocate2WayPartitionMemory(ctrl, graph); - where = graph->where; - - bestwhere = idxmalloc(nvtxs, "BisectGraph: bestwhere"); - nbfs = 2*(nvtxs <= ctrl->CoarsenTo ? SMALLNIPARTS : LARGENIPARTS); - bestcut = idxsum(graph->nedges, graph->adjwgt); - - for (; nbfs>0; nbfs--) { - idxset(nvtxs, 1, where); - where[RandomInRange(nvtxs)] = 0; - - MocCompute2WayPartitionParams(ctrl, graph); - - MocInit2WayBalance2(ctrl, graph, tpwgts, ubvec); - - MocFM_2WayEdgeRefine2(ctrl, graph, tpwgts, ubvec, 4); - - if (bestcut > graph->mincut) { - bestcut = graph->mincut; - idxcopy(nvtxs, where, bestwhere); - if (bestcut == 0) - break; - } - } - - graph->mincut = bestcut; - idxcopy(nvtxs, bestwhere, where); - - GKfree((void **)&bestwhere, LTERM); -} - - - -/************************************************************************* -* This function balances two partitions by moving the highest gain -* (including negative gain) vertices to the other domain. -* It is used only when tha unbalance is due to non contigous -* subdomains. That is, the are no boundary vertices. -* It moves vertices from the domain that is overweight to the one that -* is underweight. -**************************************************************************/ -void MocInit2WayBalance2(CtrlType *ctrl, GraphType *graph, float *tpwgts, float *ubvec) -{ - int i, ii, j, k, l, kwgt, nvtxs, nbnd, ncon, nswaps, from, to, pass, me, cnum, tmp, imin; - idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; - idxtype *moved, *perm, *qnum; - float *nvwgt, *npwgts, minwgt; - PQueueType parts[MAXNCON][2]; - int higain, oldgain, mincut; - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - adjncy = graph->adjncy; - nvwgt = graph->nvwgt; - adjwgt = graph->adjwgt; - where = graph->where; - id = graph->id; - ed = graph->ed; - npwgts = graph->npwgts; - bndptr = graph->bndptr; - bndind = graph->bndind; - - moved = idxwspacemalloc(ctrl, nvtxs); - perm = idxwspacemalloc(ctrl, nvtxs); - qnum = idxwspacemalloc(ctrl, nvtxs); - - /* This is called for initial partitioning so we know from where to pick nodes */ - from = 1; - to = (from+1)%2; - - if (ctrl->dbglvl&DBG_REFINE) { - printf("Parts: ["); - for (l=0; lnvtxs, graph->nbnd, graph->mincut, ComputeLoadImbalance(ncon, 2, npwgts, tpwgts)); - } - - for (i=0; imincut); - ASSERT(CheckBnd(graph)); - ASSERT(CheckGraph(graph)); - - /* Compute the queues in which each vertex will be assigned to */ - for (i=0; i 0) - PQueueInsert(&parts[qnum[i]][0], i, ed[i]-id[i]); - else - PQueueInsert(&parts[qnum[i]][1], i, ed[i]-id[i]); - } - } - -/* - for (i=0; imincut; - nbnd = graph->nbnd; - for (nswaps=0; nswaps minwgt) - break; - - if ((cnum = SelectQueueOneWay2(ncon, npwgts+to*ncon, parts, ubvec)) == -1) - break; - - if ((higain = PQueueGetMax(&parts[cnum][0])) == -1) - higain = PQueueGetMax(&parts[cnum][1]); - - mincut -= (ed[higain]-id[higain]); - saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); - saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); - - where[higain] = to; - moved[higain] = nswaps; - - if (ctrl->dbglvl&DBG_MOVEINFO) { - printf("Moved %6d from %d(%d). [%5d] %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], mincut); - for (l=0; l 0) - printf("\t Pulled from the interior!\n"); - } - - - /************************************************************** - * Update the id[i]/ed[i] values of the affected nodes - ***************************************************************/ - SWAP(id[higain], ed[higain], tmp); - if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) - BNDDelete(nbnd, bndind, bndptr, higain); - if (ed[higain] > 0 && bndptr[higain] == -1) - BNDInsert(nbnd, bndind, bndptr, higain); - - for (j=xadj[higain]; j 0 && bndptr[k] == -1) { /* It moves in boundary */ - PQueueDelete(&parts[qnum[k]][1], k, oldgain); - PQueueInsert(&parts[qnum[k]][0], k, ed[k]-id[k]); - } - else { /* It must be in the boundary already */ - if (bndptr[k] == -1) - printf("What you thought was wrong!\n"); - PQueueUpdate(&parts[qnum[k]][0], k, oldgain, ed[k]-id[k]); - } - } - - /* Update its boundary information */ - if (ed[k] == 0 && bndptr[k] != -1) - BNDDelete(nbnd, bndind, bndptr, k); - else if (ed[k] > 0 && bndptr[k] == -1) - BNDInsert(nbnd, bndind, bndptr, k); - } - - ASSERTP(ComputeCut(graph, where) == mincut, ("%d != %d\n", ComputeCut(graph, where), mincut)); - - } - - if (ctrl->dbglvl&DBG_REFINE) { - printf("\tMincut: %6d, NBND: %6d, NPwgts: ", mincut, nbnd); - for (l=0; lmincut = mincut; - graph->nbnd = nbnd; - - for (i=0; imincut); - ASSERT(CheckBnd(graph)); - - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); -} - - - -/************************************************************************* -* This function selects the partition number and the queue from which -* we will move vertices out -**************************************************************************/ -int SelectQueueOneWay2(int ncon, float *pto, PQueueType queues[MAXNCON][2], float *ubvec) -{ - int i, cnum=-1, imax, maxgain; - float max=0.0; - float twgt[MAXNCON]; - - for (i=0; i 0 || PQueueGetSize(&queues[i][1]) > 0)) { - max = twgt[i]; - cnum = i; - } - } - if (max > 1) - return cnum; - - /* optimize of cut */ - maxgain = -10000000; - for (i=0; i 0 && PQueueGetKey(&queues[i][0]) > maxgain) { - maxgain = PQueueGetKey(&queues[i][0]); - cnum = i; - } - } - - return cnum; - -} - diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mkmetis.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mkmetis.c deleted file mode 100644 index c51bf375..00000000 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mkmetis.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * mkmetis.c - * - * This file contains the top level routines for the multilevel k-way partitioning - * algorithm KMETIS. - * - * Started 7/28/97 - * George - * - * $Id: mkmetis.c,v 1.1 2003/07/16 15:55:10 karypis Exp $ - * - */ - -#include - - - -/************************************************************************* -* This function is the entry point for KWMETIS -**************************************************************************/ -void METIS_mCPartGraphKway(int *nvtxs, int *ncon, idxtype *xadj, idxtype *adjncy, - idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, - int *nparts, float *rubvec, int *options, int *edgecut, - idxtype *part) -{ - int i, j; - GraphType graph; - CtrlType ctrl; - - if (*numflag == 1) - Change2CNumbering(*nvtxs, xadj, adjncy); - - SetUpGraph(&graph, OP_KMETIS, *nvtxs, *ncon, xadj, adjncy, vwgt, adjwgt, *wgtflag); - - if (options[0] == 0) { /* Use the default parameters */ - ctrl.CType = McKMETIS_CTYPE; - ctrl.IType = McKMETIS_ITYPE; - ctrl.RType = McKMETIS_RTYPE; - ctrl.dbglvl = McKMETIS_DBGLVL; - } - else { - ctrl.CType = options[OPTION_CTYPE]; - ctrl.IType = options[OPTION_ITYPE]; - ctrl.RType = options[OPTION_RTYPE]; - ctrl.dbglvl = options[OPTION_DBGLVL]; - } - ctrl.optype = OP_KMETIS; - ctrl.CoarsenTo = amax((*nvtxs)/(20*log2Int(*nparts)), 30*(*nparts)); - - ctrl.nmaxvwgt = 1.5/(1.0*ctrl.CoarsenTo); - - InitRandom(-1); - - AllocateWorkSpace(&ctrl, &graph, *nparts); - - IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); - - ASSERT(CheckGraph(&graph)); - *edgecut = MCMlevelKWayPartitioning(&ctrl, &graph, *nparts, part, rubvec); - - IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); - IFSET(ctrl.dbglvl, DBG_TIME, PrintTimers(&ctrl)); - - FreeWorkSpace(&ctrl, &graph); - - if (*numflag == 1) - Change2FNumbering(*nvtxs, xadj, adjncy, part); -} - - -/************************************************************************* -* This function takes a graph and produces a bisection of it -**************************************************************************/ -int MCMlevelKWayPartitioning(CtrlType *ctrl, GraphType *graph, int nparts, idxtype *part, - float *rubvec) -{ - int i, j, nvtxs; - GraphType *cgraph; - int options[10], edgecut; - - cgraph = MCCoarsen2Way(ctrl, graph); - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); - MocAllocateKWayPartitionMemory(ctrl, cgraph, nparts); - - options[0] = 1; - options[OPTION_CTYPE] = MATCH_SBHEM_INFNORM; - options[OPTION_ITYPE] = IPART_RANDOM; - options[OPTION_RTYPE] = RTYPE_FM; - options[OPTION_DBGLVL] = 0; - - /* Determine what you will use as the initial partitioner, based on tolerances */ - for (i=0; incon; i++) { - if (rubvec[i] > 1.2) - break; - } - if (i == graph->ncon) - METIS_mCPartGraphRecursiveInternal(&cgraph->nvtxs, &cgraph->ncon, - cgraph->xadj, cgraph->adjncy, cgraph->nvwgt, cgraph->adjwgt, &nparts, - options, &edgecut, cgraph->where); - else - METIS_mCHPartGraphRecursiveInternal(&cgraph->nvtxs, &cgraph->ncon, - cgraph->xadj, cgraph->adjncy, cgraph->nvwgt, cgraph->adjwgt, &nparts, - rubvec, options, &edgecut, cgraph->where); - - - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); - IFSET(ctrl->dbglvl, DBG_IPART, printf("Initial %d-way partitioning cut: %d\n", nparts, edgecut)); - - IFSET(ctrl->dbglvl, DBG_KWAYPINFO, ComputePartitionInfo(cgraph, nparts, cgraph->where)); - - MocRefineKWayHorizontal(ctrl, graph, cgraph, nparts, rubvec); - - idxcopy(graph->nvtxs, graph->where, part); - - GKfree((void **)(&graph->nvwgt), &graph->gdata, &graph->rdata, LTERM); - - return graph->mincut; - -} - diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mkwayfmh.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mkwayfmh.c deleted file mode 100644 index 69a781a4..00000000 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mkwayfmh.c +++ /dev/null @@ -1,677 +0,0 @@ -/* - * mkwayfmh.c - * - * This file contains code that implements the multilevel k-way refinement - * - * Started 7/28/97 - * George - * - * $Id: mkwayfmh.c,v 1.1 2003/07/16 15:55:10 karypis Exp $ - * - */ - -#include - - - -/************************************************************************* -* This function performs k-way refinement -**************************************************************************/ -void MCRandom_KWayEdgeRefineHorizontal(CtrlType *ctrl, GraphType *graph, int nparts, - float *orgubvec, int npasses) -{ - int i, ii, iii, j, jj, k, l, pass, nvtxs, ncon, nmoves, nbnd, myndegrees, same; - int from, me, to, oldcut, gain; - idxtype *xadj, *adjncy, *adjwgt; - idxtype *where, *perm, *bndptr, *bndind; - EDegreeType *myedegrees; - RInfoType *myrinfo; - float *npwgts, *nvwgt, *minwgt, *maxwgt, maxlb, minlb, ubvec[MAXNCON], tvec[MAXNCON]; - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - bndptr = graph->bndptr; - bndind = graph->bndind; - - where = graph->where; - npwgts = graph->npwgts; - - /* Setup the weight intervals of the various subdomains */ - minwgt = fwspacemalloc(ctrl, nparts*ncon); - maxwgt = fwspacemalloc(ctrl, nparts*ncon); - - /* See if the orgubvec consists of identical constraints */ - maxlb = minlb = orgubvec[0]; - for (i=1; i maxlb ? orgubvec[i] : maxlb); - } - same = (fabs(maxlb-minlb) < .01 ? 1 : 0); - - - /* Let's not get very optimistic. Let Balancing do the work */ - ComputeHKWayLoadImbalance(ncon, nparts, npwgts, ubvec); - for (i=0; i maxlb ? ubvec[i] : maxlb); - - for (i=0; idbglvl&DBG_REFINE) { - printf("Partitions: [%5.4f %5.4f], Nv-Nb[%6d %6d]. Cut: %6d, LB: ", - npwgts[samin(ncon*nparts, npwgts)], npwgts[samax(ncon*nparts, npwgts)], - graph->nvtxs, graph->nbnd, graph->mincut); - ComputeHKWayLoadImbalance(ncon, nparts, npwgts, tvec); - for (i=0; imincut); - - oldcut = graph->mincut; - nbnd = graph->nbnd; - - RandomPermute(nbnd, perm, 1); - for (nmoves=iii=0; iiinbnd; iii++) { - ii = perm[iii]; - if (ii >= nbnd) - continue; - i = bndind[ii]; - - myrinfo = graph->rinfo+i; - - if (myrinfo->ed >= myrinfo->id) { /* Total ED is too high */ - from = where[i]; - nvwgt = graph->nvwgt+i*ncon; - - if (myrinfo->id > 0 && AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, -1.0, nvwgt, minwgt+from*ncon)) - continue; /* This cannot be moved! */ - - myedegrees = myrinfo->edegrees; - myndegrees = myrinfo->ndegrees; - - for (k=0; kid; - if (gain >= 0 && - (AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon) || - IsHBalanceBetterFT(ncon, nparts, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec))) - break; - } - if (k == myndegrees) - continue; /* break out if you did not find a candidate */ - - for (j=k+1; j myedegrees[k].ed && - (AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon) || - IsHBalanceBetterFT(ncon, nparts, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec))) || - (myedegrees[j].ed == myedegrees[k].ed && - IsHBalanceBetterTT(ncon, nparts, npwgts+myedegrees[k].pid*ncon, npwgts+to*ncon, nvwgt, ubvec))) - k = j; - } - - to = myedegrees[k].pid; - - if (myedegrees[k].ed-myrinfo->id == 0 - && !IsHBalanceBetterFT(ncon, nparts, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec) - && AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, 0.0, npwgts+from*ncon, maxwgt+from*ncon)) - continue; - - /*===================================================================== - * If we got here, we can now move the vertex from 'from' to 'to' - *======================================================================*/ - graph->mincut -= myedegrees[k].ed-myrinfo->id; - - IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("\t\tMoving %6d to %3d. Gain: %4d. Cut: %6d\n", i, to, myedegrees[k].ed-myrinfo->id, graph->mincut)); - - /* Update where, weight, and ID/ED information of the vertex you moved */ - saxpy(ncon, 1.0, nvwgt, 1, npwgts+to*ncon, 1); - saxpy(ncon, -1.0, nvwgt, 1, npwgts+from*ncon, 1); - where[i] = to; - myrinfo->ed += myrinfo->id-myedegrees[k].ed; - SWAP(myrinfo->id, myedegrees[k].ed, j); - if (myedegrees[k].ed == 0) - myedegrees[k] = myedegrees[--myrinfo->ndegrees]; - else - myedegrees[k].pid = from; - - if (myrinfo->ed-myrinfo->id < 0) - BNDDelete(nbnd, bndind, bndptr, i); - - /* Update the degrees of adjacent vertices */ - for (j=xadj[i]; jrinfo+ii; - if (myrinfo->edegrees == NULL) { - myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree; - ctrl->wspace.cdegree += xadj[ii+1]-xadj[ii]; - } - myedegrees = myrinfo->edegrees; - - ASSERT(CheckRInfo(myrinfo)); - - if (me == from) { - INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); - - if (myrinfo->ed-myrinfo->id >= 0 && bndptr[ii] == -1) - BNDInsert(nbnd, bndind, bndptr, ii); - } - else if (me == to) { - INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); - - if (myrinfo->ed-myrinfo->id < 0 && bndptr[ii] != -1) - BNDDelete(nbnd, bndind, bndptr, ii); - } - - /* Remove contribution from the .ed of 'from' */ - if (me != from) { - for (k=0; kndegrees; k++) { - if (myedegrees[k].pid == from) { - if (myedegrees[k].ed == adjwgt[j]) - myedegrees[k] = myedegrees[--myrinfo->ndegrees]; - else - myedegrees[k].ed -= adjwgt[j]; - break; - } - } - } - - /* Add contribution to the .ed of 'to' */ - if (me != to) { - for (k=0; kndegrees; k++) { - if (myedegrees[k].pid == to) { - myedegrees[k].ed += adjwgt[j]; - break; - } - } - if (k == myrinfo->ndegrees) { - myedegrees[myrinfo->ndegrees].pid = to; - myedegrees[myrinfo->ndegrees++].ed = adjwgt[j]; - } - } - - ASSERT(myrinfo->ndegrees <= xadj[ii+1]-xadj[ii]); - ASSERT(CheckRInfo(myrinfo)); - - } - nmoves++; - } - } - - graph->nbnd = nbnd; - - if (ctrl->dbglvl&DBG_REFINE) { - printf("\t [%5.4f %5.4f], Nb: %6d, Nmoves: %5d, Cut: %6d, LB: ", - npwgts[samin(ncon*nparts, npwgts)], npwgts[samax(ncon*nparts, npwgts)], - nbnd, nmoves, graph->mincut); - ComputeHKWayLoadImbalance(ncon, nparts, npwgts, tvec); - for (i=0; imincut == oldcut) - break; - } - - fwspacefree(ctrl, ncon*nparts); - fwspacefree(ctrl, ncon*nparts); - idxwspacefree(ctrl, nvtxs); -} - - - -/************************************************************************* -* This function performs k-way refinement -**************************************************************************/ -void MCGreedy_KWayEdgeBalanceHorizontal(CtrlType *ctrl, GraphType *graph, int nparts, - float *ubvec, int npasses) -{ - int i, ii, iii, j, jj, k, l, pass, nvtxs, ncon, nbnd, myndegrees, oldgain, gain, nmoves; - int from, me, to, oldcut; - idxtype *xadj, *adjncy, *adjwgt; - idxtype *where, *perm, *bndptr, *bndind, *moved; - EDegreeType *myedegrees; - RInfoType *myrinfo; - PQueueType queue; - float *npwgts, *nvwgt, *minwgt, *maxwgt, tvec[MAXNCON]; - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - bndind = graph->bndind; - bndptr = graph->bndptr; - - where = graph->where; - npwgts = graph->npwgts; - - /* Setup the weight intervals of the various subdomains */ - minwgt = fwspacemalloc(ctrl, ncon*nparts); - maxwgt = fwspacemalloc(ctrl, ncon*nparts); - - for (i=0; iadjwgtsum[idxamax(nvtxs, graph->adjwgtsum)]); - - if (ctrl->dbglvl&DBG_REFINE) { - printf("Partitions: [%5.4f %5.4f], Nv-Nb[%6d %6d]. Cut: %6d, LB: ", - npwgts[samin(ncon*nparts, npwgts)], npwgts[samax(ncon*nparts, npwgts)], - graph->nvtxs, graph->nbnd, graph->mincut); - ComputeHKWayLoadImbalance(ncon, nparts, npwgts, tvec); - for (i=0; imincut); - - /* Check to see if things are out of balance, given the tolerance */ - if (MocIsHBalanced(ncon, nparts, npwgts, ubvec)) - break; - - PQueueReset(&queue); - idxset(nvtxs, -1, moved); - - oldcut = graph->mincut; - nbnd = graph->nbnd; - - RandomPermute(nbnd, perm, 1); - for (ii=0; iirinfo[i].ed - graph->rinfo[i].id); - moved[i] = 2; - } - - nmoves = 0; - for (;;) { - if ((i = PQueueGetMax(&queue)) == -1) - break; - moved[i] = 1; - - myrinfo = graph->rinfo+i; - from = where[i]; - nvwgt = graph->nvwgt+i*ncon; - - if (AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, -1.0, nvwgt, minwgt+from*ncon)) - continue; /* This cannot be moved! */ - - myedegrees = myrinfo->edegrees; - myndegrees = myrinfo->ndegrees; - - for (k=0; kid >= 0) - j++; - if (!AreAllHVwgtsAbove(ncon, 1.0, npwgts+to*ncon, 0.0, nvwgt, minwgt+to*ncon) && - AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon)) - j++; - if (j == 0) - continue; - -/* DELETE - if (myedegrees[k].ed-myrinfo->id < 0 && - AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, 0.0, nvwgt, maxwgt+from*ncon) && - AreAllHVwgtsAbove(ncon, 1.0, npwgts+to*ncon, 0.0, nvwgt, minwgt+to*ncon) && - AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon)) - continue; -*/ - /*===================================================================== - * If we got here, we can now move the vertex from 'from' to 'to' - *======================================================================*/ - graph->mincut -= myedegrees[k].ed-myrinfo->id; - - IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("\t\tMoving %6d to %3d. Gain: %4d. Cut: %6d\n", i, to, myedegrees[k].ed-myrinfo->id, graph->mincut)); - - /* Update where, weight, and ID/ED information of the vertex you moved */ - saxpy(ncon, 1.0, nvwgt, 1, npwgts+to*ncon, 1); - saxpy(ncon, -1.0, nvwgt, 1, npwgts+from*ncon, 1); - where[i] = to; - myrinfo->ed += myrinfo->id-myedegrees[k].ed; - SWAP(myrinfo->id, myedegrees[k].ed, j); - if (myedegrees[k].ed == 0) - myedegrees[k] = myedegrees[--myrinfo->ndegrees]; - else - myedegrees[k].pid = from; - - if (myrinfo->ed == 0) - BNDDelete(nbnd, bndind, bndptr, i); - - /* Update the degrees of adjacent vertices */ - for (j=xadj[i]; jrinfo+ii; - if (myrinfo->edegrees == NULL) { - myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree; - ctrl->wspace.cdegree += xadj[ii+1]-xadj[ii]; - } - myedegrees = myrinfo->edegrees; - - ASSERT(CheckRInfo(myrinfo)); - - oldgain = (myrinfo->ed-myrinfo->id); - - if (me == from) { - INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); - - if (myrinfo->ed > 0 && bndptr[ii] == -1) - BNDInsert(nbnd, bndind, bndptr, ii); - } - else if (me == to) { - INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); - - if (myrinfo->ed == 0 && bndptr[ii] != -1) - BNDDelete(nbnd, bndind, bndptr, ii); - } - - /* Remove contribution from the .ed of 'from' */ - if (me != from) { - for (k=0; kndegrees; k++) { - if (myedegrees[k].pid == from) { - if (myedegrees[k].ed == adjwgt[j]) - myedegrees[k] = myedegrees[--myrinfo->ndegrees]; - else - myedegrees[k].ed -= adjwgt[j]; - break; - } - } - } - - /* Add contribution to the .ed of 'to' */ - if (me != to) { - for (k=0; kndegrees; k++) { - if (myedegrees[k].pid == to) { - myedegrees[k].ed += adjwgt[j]; - break; - } - } - if (k == myrinfo->ndegrees) { - myedegrees[myrinfo->ndegrees].pid = to; - myedegrees[myrinfo->ndegrees++].ed = adjwgt[j]; - } - } - - - /* Update the queue */ - if (me == to || me == from) { - gain = myrinfo->ed-myrinfo->id; - if (moved[ii] == 2) { - if (myrinfo->ed > 0) - PQueueUpdate(&queue, ii, oldgain, gain); - else { - PQueueDelete(&queue, ii, oldgain); - moved[ii] = -1; - } - } - else if (moved[ii] == -1 && myrinfo->ed > 0) { - PQueueInsert(&queue, ii, gain); - moved[ii] = 2; - } - } - - ASSERT(myrinfo->ndegrees <= xadj[ii+1]-xadj[ii]); - ASSERT(CheckRInfo(myrinfo)); - } - nmoves++; - } - - graph->nbnd = nbnd; - - if (ctrl->dbglvl&DBG_REFINE) { - printf("\t [%5.4f %5.4f], Nb: %6d, Nmoves: %5d, Cut: %6d, LB: ", - npwgts[samin(ncon*nparts, npwgts)], npwgts[samax(ncon*nparts, npwgts)], - nbnd, nmoves, graph->mincut); - ComputeHKWayLoadImbalance(ncon, nparts, npwgts, tvec); - for (i=0; i limit[i]) - return 0; - - return 1; -} - - - -/************************************************************************* -* This function checks if the vertex weights of two vertices are above -* a given set of values -**************************************************************************/ -int AreAllHVwgtsAbove(int ncon, float alpha, float *vwgt1, float beta, float *vwgt2, float *limit) -{ - int i; - - for (i=0; i max) - max = npwgts[j*ncon+i]; - } - - lbvec[i] = max*nparts; - } -} - - -/************************************************************************* -* This function determines if a partitioning is horizontally balanced -**************************************************************************/ -int MocIsHBalanced(int ncon, int nparts, float *npwgts, float *ubvec) -{ - int i, j; - float max; - - for (i=0; i max) - max = npwgts[j*ncon+i]; - } - - if (ubvec[i] < max*nparts) - return 0; - } - - return 1; -} - - - - - -/************************************************************************* -* This function checks if the pairwise balance of the between the two -* partitions will improve by moving the vertex v from pfrom to pto, -* subject to the target partition weights of tfrom, and tto respectively -**************************************************************************/ -int IsHBalanceBetterFT(int ncon, int nparts, float *pfrom, float *pto, float *vwgt, float *ubvec) -{ - int i, j, k; - float blb1=0.0, alb1=0.0, sblb=0.0, salb=0.0; - float blb2=0.0, alb2=0.0; - float temp; - - for (i=0; i m11) - return 0; - if (m22 < m12) - return 1; - if (m22 > m12) - return 0; - - return sm2 < sm1; -} - diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mkwayrefine.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mkwayrefine.c deleted file mode 100644 index 15836db8..00000000 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mkwayrefine.c +++ /dev/null @@ -1,296 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * mkwayrefine.c - * - * This file contains the driving routines for multilevel k-way refinement - * - * Started 7/28/97 - * George - * - * $Id: mkwayrefine.c,v 1.1 2003/07/16 15:55:11 karypis Exp $ - */ - -#include - - -/************************************************************************* -* This function is the entry point of refinement -**************************************************************************/ -void MocRefineKWayHorizontal(CtrlType *ctrl, GraphType *orggraph, GraphType *graph, int nparts, - float *ubvec) -{ - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->UncoarsenTmr)); - - /* Compute the parameters of the coarsest graph */ - MocComputeKWayPartitionParams(ctrl, graph, nparts); - - for (;;) { - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->RefTmr)); - - if (!MocIsHBalanced(graph->ncon, nparts, graph->npwgts, ubvec)) { - MocComputeKWayBalanceBoundary(ctrl, graph, nparts); - MCGreedy_KWayEdgeBalanceHorizontal(ctrl, graph, nparts, ubvec, 4); - ComputeKWayBoundary(ctrl, graph, nparts); - } - - MCRandom_KWayEdgeRefineHorizontal(ctrl, graph, nparts, ubvec, 10); - - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->RefTmr)); - - if (graph == orggraph) - break; - - graph = graph->finer; - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ProjectTmr)); - MocProjectKWayPartition(ctrl, graph, nparts); - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ProjectTmr)); - } - - if (!MocIsHBalanced(graph->ncon, nparts, graph->npwgts, ubvec)) { - MocComputeKWayBalanceBoundary(ctrl, graph, nparts); - MCGreedy_KWayEdgeBalanceHorizontal(ctrl, graph, nparts, ubvec, 4); - ComputeKWayBoundary(ctrl, graph, nparts); - MCRandom_KWayEdgeRefineHorizontal(ctrl, graph, nparts, ubvec, 10); - } - - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->UncoarsenTmr)); -} - - - - -/************************************************************************* -* This function allocates memory for k-way edge refinement -**************************************************************************/ -void MocAllocateKWayPartitionMemory(CtrlType *ctrl, GraphType *graph, int nparts) -{ - int nvtxs, ncon, pad64; - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - - pad64 = (3*nvtxs+nparts)%2; - - graph->rdata = idxmalloc(3*nvtxs+ncon*nparts+(sizeof(RInfoType)/sizeof(idxtype))*nvtxs+pad64, "AllocateKWayPartitionMemory: rdata"); - graph->npwgts = (float *)graph->rdata; - graph->where = graph->rdata + ncon*nparts; - graph->bndptr = graph->rdata + nvtxs + ncon*nparts; - graph->bndind = graph->rdata + 2*nvtxs + ncon*nparts; - graph->rinfo = (RInfoType *)(graph->rdata + 3*nvtxs+ncon*nparts + pad64); -} - - -/************************************************************************* -* This function computes the initial id/ed -**************************************************************************/ -void MocComputeKWayPartitionParams(CtrlType *ctrl, GraphType *graph, int nparts) -{ - int i, j, k, l, nvtxs, ncon, nbnd, mincut, me, other; - idxtype *xadj, *adjncy, *adjwgt, *where, *bndind, *bndptr; - RInfoType *rinfo, *myrinfo; - EDegreeType *myedegrees; - float *nvwgt, *npwgts; - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - nvwgt = graph->nvwgt; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - where = graph->where; - npwgts = sset(ncon*nparts, 0.0, graph->npwgts); - bndind = graph->bndind; - bndptr = idxset(nvtxs, -1, graph->bndptr); - rinfo = graph->rinfo; - - - /*------------------------------------------------------------ - / Compute now the id/ed degrees - /------------------------------------------------------------*/ - ctrl->wspace.cdegree = 0; - nbnd = mincut = 0; - for (i=0; iid = myrinfo->ed = myrinfo->ndegrees = 0; - myrinfo->edegrees = NULL; - - for (j=xadj[i]; jed += adjwgt[j]; - } - myrinfo->id = graph->adjwgtsum[i] - myrinfo->ed; - - if (myrinfo->ed > 0) - mincut += myrinfo->ed; - - if (myrinfo->ed-myrinfo->id >= 0) - BNDInsert(nbnd, bndind, bndptr, i); - - /* Time to compute the particular external degrees */ - if (myrinfo->ed > 0) { - myedegrees = myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree; - ctrl->wspace.cdegree += xadj[i+1]-xadj[i]; - - for (j=xadj[i]; jndegrees; k++) { - if (myedegrees[k].pid == other) { - myedegrees[k].ed += adjwgt[j]; - break; - } - } - if (k == myrinfo->ndegrees) { - myedegrees[myrinfo->ndegrees].pid = other; - myedegrees[myrinfo->ndegrees++].ed = adjwgt[j]; - } - } - } - - ASSERT(myrinfo->ndegrees <= xadj[i+1]-xadj[i]); - } - } - - graph->mincut = mincut/2; - graph->nbnd = nbnd; - -} - - - -/************************************************************************* -* This function projects a partition, and at the same time computes the -* parameters for refinement. -**************************************************************************/ -void MocProjectKWayPartition(CtrlType *ctrl, GraphType *graph, int nparts) -{ - int i, j, k, nvtxs, nbnd, me, other, istart, iend, ndegrees; - idxtype *xadj, *adjncy, *adjwgt, *adjwgtsum; - idxtype *cmap, *where, *bndptr, *bndind; - idxtype *cwhere; - GraphType *cgraph; - RInfoType *crinfo, *rinfo, *myrinfo; - EDegreeType *myedegrees; - idxtype *htable; - - cgraph = graph->coarser; - cwhere = cgraph->where; - crinfo = cgraph->rinfo; - - nvtxs = graph->nvtxs; - cmap = graph->cmap; - xadj = graph->xadj; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - adjwgtsum = graph->adjwgtsum; - - MocAllocateKWayPartitionMemory(ctrl, graph, nparts); - where = graph->where; - rinfo = graph->rinfo; - bndind = graph->bndind; - bndptr = idxset(nvtxs, -1, graph->bndptr); - - /* Go through and project partition and compute id/ed for the nodes */ - for (i=0; iwspace.cdegree = 0; - for (nbnd=0, i=0; iid = myrinfo->ed = myrinfo->ndegrees = 0; - myrinfo->edegrees = NULL; - - myrinfo->id = adjwgtsum[i]; - - if (cmap[i] > 0) { /* If it is an interface node. Note cmap[i] = crinfo[cmap[i]].ed */ - istart = xadj[i]; - iend = xadj[i+1]; - - myedegrees = myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree; - ctrl->wspace.cdegree += iend-istart; - - ndegrees = 0; - for (j=istart; jed += adjwgt[j]; - if ((k = htable[other]) == -1) { - htable[other] = ndegrees; - myedegrees[ndegrees].pid = other; - myedegrees[ndegrees++].ed = adjwgt[j]; - } - else { - myedegrees[k].ed += adjwgt[j]; - } - } - } - myrinfo->id -= myrinfo->ed; - - /* Remove space for edegrees if it was interior */ - if (myrinfo->ed == 0) { - myrinfo->edegrees = NULL; - ctrl->wspace.cdegree -= iend-istart; - } - else { - if (myrinfo->ed-myrinfo->id >= 0) - BNDInsert(nbnd, bndind, bndptr, i); - - myrinfo->ndegrees = ndegrees; - - for (j=0; jncon*nparts, cgraph->npwgts, graph->npwgts); - graph->mincut = cgraph->mincut; - graph->nbnd = nbnd; - - FreeGraph(graph->coarser); - graph->coarser = NULL; - - idxwspacefree(ctrl, nparts); - - ASSERT(CheckBnd2(graph)); - -} - - - -/************************************************************************* -* This function computes the boundary definition for balancing -**************************************************************************/ -void MocComputeKWayBalanceBoundary(CtrlType *ctrl, GraphType *graph, int nparts) -{ - int i, nvtxs, nbnd; - idxtype *bndind, *bndptr; - - nvtxs = graph->nvtxs; - bndind = graph->bndind; - bndptr = idxset(nvtxs, -1, graph->bndptr); - - - /* Compute the new boundary */ - nbnd = 0; - for (i=0; irinfo[i].ed > 0) - BNDInsert(nbnd, bndind, bndptr, i); - } - - graph->nbnd = nbnd; -} - diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mmatch.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mmatch.c deleted file mode 100644 index 2666dd3d..00000000 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mmatch.c +++ /dev/null @@ -1,501 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * mmatch.c - * - * This file contains the code that computes matchings and creates the next - * level coarse graph. - * - * Started 7/23/97 - * George - * - * $Id: mmatch.c,v 1.1 2003/07/16 15:55:11 karypis Exp $ - * - */ - -#include - - -/************************************************************************* -* This function finds a matching using the HEM heuristic -**************************************************************************/ -void MCMatch_RM(CtrlType *ctrl, GraphType *graph) -{ - int i, ii, j, k, nvtxs, ncon, cnvtxs, maxidx; - idxtype *xadj, *adjncy, *adjwgt; - idxtype *match, *cmap, *perm; - float *nvwgt; - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->MatchTmr)); - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - nvwgt = graph->nvwgt; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - cmap = graph->cmap; - match = idxset(nvtxs, UNMATCHED, idxwspacemalloc(ctrl, nvtxs)); - - perm = idxwspacemalloc(ctrl, nvtxs); - RandomPermute(nvtxs, perm, 1); - - cnvtxs = 0; - for (ii=0; iinmaxvwgt)) { - maxidx = k; - break; - } - } - - cmap[i] = cmap[maxidx] = cnvtxs++; - match[i] = maxidx; - match[maxidx] = i; - } - } - - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->MatchTmr)); - - CreateCoarseGraph(ctrl, graph, cnvtxs, match, perm); - - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); -} - - - -/************************************************************************* -* This function finds a matching using the HEM heuristic -**************************************************************************/ -void MCMatch_HEM(CtrlType *ctrl, GraphType *graph) -{ - int i, ii, j, k, l, nvtxs, cnvtxs, ncon, maxidx, maxwgt; - idxtype *xadj, *adjncy, *adjwgt; - idxtype *match, *cmap, *perm; - float *nvwgt; - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->MatchTmr)); - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - nvwgt = graph->nvwgt; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - cmap = graph->cmap; - match = idxset(nvtxs, UNMATCHED, idxwspacemalloc(ctrl, nvtxs)); - - perm = idxwspacemalloc(ctrl, nvtxs); - RandomPermute(nvtxs, perm, 1); - - cnvtxs = 0; - for (ii=0; iinmaxvwgt)) { - maxwgt = adjwgt[j]; - maxidx = adjncy[j]; - } - } - - cmap[i] = cmap[maxidx] = cnvtxs++; - match[i] = maxidx; - match[maxidx] = i; - } - } - - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->MatchTmr)); - - CreateCoarseGraph(ctrl, graph, cnvtxs, match, perm); - - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); -} - - - -/************************************************************************* -* This function finds a matching using the HEM heuristic -**************************************************************************/ -void MCMatch_SHEM(CtrlType *ctrl, GraphType *graph) -{ - int i, ii, j, k, nvtxs, cnvtxs, ncon, maxidx, maxwgt, avgdegree; - idxtype *xadj, *adjncy, *adjwgt; - idxtype *match, *cmap, *degrees, *perm, *tperm; - float *nvwgt; - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->MatchTmr)); - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - nvwgt = graph->nvwgt; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - cmap = graph->cmap; - match = idxset(nvtxs, UNMATCHED, idxwspacemalloc(ctrl, nvtxs)); - - perm = idxwspacemalloc(ctrl, nvtxs); - tperm = idxwspacemalloc(ctrl, nvtxs); - degrees = idxwspacemalloc(ctrl, nvtxs); - - RandomPermute(nvtxs, tperm, 1); - avgdegree = 0.7*(xadj[nvtxs]/nvtxs); - for (i=0; i avgdegree ? avgdegree : xadj[i+1]-xadj[i]); - BucketSortKeysInc(nvtxs, avgdegree, degrees, tperm, perm); - - cnvtxs = 0; - - /* Take care any islands. Islands are matched with non-islands due to coarsening */ - for (ii=0; iiii; j--) { - k = perm[j]; - if (match[k] == UNMATCHED && xadj[k] < xadj[k+1]) { - maxidx = k; - break; - } - } - - cmap[i] = cmap[maxidx] = cnvtxs++; - match[i] = maxidx; - match[maxidx] = i; - } - } - - /* Continue with normal matching */ - for (; iinmaxvwgt)) { - maxwgt = adjwgt[j]; - maxidx = adjncy[j]; - } - } - - cmap[i] = cmap[maxidx] = cnvtxs++; - match[i] = maxidx; - match[maxidx] = i; - } - } - - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->MatchTmr)); - - idxwspacefree(ctrl, nvtxs); /* degrees */ - idxwspacefree(ctrl, nvtxs); /* tperm */ - - CreateCoarseGraph(ctrl, graph, cnvtxs, match, perm); - - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); -} - - - -/************************************************************************* -* This function finds a matching using the HEM heuristic -**************************************************************************/ -void MCMatch_SHEBM(CtrlType *ctrl, GraphType *graph, int norm) -{ - int i, ii, j, k, nvtxs, cnvtxs, ncon, maxidx, maxwgt, avgdegree; - idxtype *xadj, *adjncy, *adjwgt; - idxtype *match, *cmap, *degrees, *perm, *tperm; - float *nvwgt; - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->MatchTmr)); - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - nvwgt = graph->nvwgt; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - cmap = graph->cmap; - match = idxset(nvtxs, UNMATCHED, idxwspacemalloc(ctrl, nvtxs)); - - perm = idxwspacemalloc(ctrl, nvtxs); - tperm = idxwspacemalloc(ctrl, nvtxs); - degrees = idxwspacemalloc(ctrl, nvtxs); - - RandomPermute(nvtxs, tperm, 1); - avgdegree = 0.7*(xadj[nvtxs]/nvtxs); - for (i=0; i avgdegree ? avgdegree : xadj[i+1]-xadj[i]); - BucketSortKeysInc(nvtxs, avgdegree, degrees, tperm, perm); - - cnvtxs = 0; - - /* Take care any islands. Islands are matched with non-islands due to coarsening */ - for (ii=0; iiii; j--) { - k = perm[j]; - if (match[k] == UNMATCHED && xadj[k] < xadj[k+1]) { - maxidx = k; - break; - } - } - - cmap[i] = cmap[maxidx] = cnvtxs++; - match[i] = maxidx; - match[maxidx] = i; - } - } - - /* Continue with normal matching */ - for (; iinmaxvwgt) && - (maxwgt < adjwgt[j] || - (maxwgt == adjwgt[j] && - BetterVBalance(ncon, norm, nvwgt+i*ncon, nvwgt+maxidx*ncon, nvwgt+k*ncon) >= 0 - ) - ) - ) { - maxwgt = adjwgt[j]; - maxidx = k; - } - } - - cmap[i] = cmap[maxidx] = cnvtxs++; - match[i] = maxidx; - match[maxidx] = i; - } - } - - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->MatchTmr)); - - idxwspacefree(ctrl, nvtxs); /* degrees */ - idxwspacefree(ctrl, nvtxs); /* tperm */ - - CreateCoarseGraph(ctrl, graph, cnvtxs, match, perm); - - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); -} - - - -/************************************************************************* -* This function finds a matching using the HEM heuristic -**************************************************************************/ -void MCMatch_SBHEM(CtrlType *ctrl, GraphType *graph, int norm) -{ - int i, ii, j, k, nvtxs, cnvtxs, ncon, maxidx, maxwgt, avgdegree; - idxtype *xadj, *adjncy, *adjwgt; - idxtype *match, *cmap, *degrees, *perm, *tperm; - float *nvwgt, vbal; - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->MatchTmr)); - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - nvwgt = graph->nvwgt; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - cmap = graph->cmap; - match = idxset(nvtxs, UNMATCHED, idxwspacemalloc(ctrl, nvtxs)); - - perm = idxwspacemalloc(ctrl, nvtxs); - tperm = idxwspacemalloc(ctrl, nvtxs); - degrees = idxwspacemalloc(ctrl, nvtxs); - - RandomPermute(nvtxs, tperm, 1); - avgdegree = 0.7*(xadj[nvtxs]/nvtxs); - for (i=0; i avgdegree ? avgdegree : xadj[i+1]-xadj[i]); - BucketSortKeysInc(nvtxs, avgdegree, degrees, tperm, perm); - - cnvtxs = 0; - - /* Take care any islands. Islands are matched with non-islands due to coarsening */ - for (ii=0; iiii; j--) { - k = perm[j]; - if (match[k] == UNMATCHED && xadj[k] < xadj[k+1]) { - maxidx = k; - break; - } - } - - cmap[i] = cmap[maxidx] = cnvtxs++; - match[i] = maxidx; - match[maxidx] = i; - } - } - - /* Continue with normal matching */ - for (; iinmaxvwgt)) { - if (maxidx != i) - vbal = BetterVBalance(ncon, norm, nvwgt+i*ncon, nvwgt+maxidx*ncon, nvwgt+k*ncon); - - if (vbal > 0 || (vbal > -.01 && maxwgt < adjwgt[j])) { - maxwgt = adjwgt[j]; - maxidx = k; - } - } - } - - cmap[i] = cmap[maxidx] = cnvtxs++; - match[i] = maxidx; - match[maxidx] = i; - } - } - - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->MatchTmr)); - - idxwspacefree(ctrl, nvtxs); /* degrees */ - idxwspacefree(ctrl, nvtxs); /* tperm */ - - CreateCoarseGraph(ctrl, graph, cnvtxs, match, perm); - - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); -} - - - - - -/************************************************************************* -* This function checks if v+u2 provides a better balance in the weight -* vector that v+u1 -**************************************************************************/ -float BetterVBalance(int ncon, int norm, float *vwgt, float *u1wgt, float *u2wgt) -{ - int i; - float sum1, sum2, max1, max2, min1, min2, diff1, diff2; - - if (norm == -1) { - max1 = min1 = vwgt[0]+u1wgt[0]; - max2 = min2 = vwgt[0]+u2wgt[0]; - sum1 = vwgt[0]+u1wgt[0]; - sum2 = vwgt[0]+u2wgt[0]; - - for (i=1; i vwgt[i]+u1wgt[i]) - min1 = vwgt[i]+u1wgt[i]; - - if (max2 < vwgt[i]+u2wgt[i]) - max2 = vwgt[i]+u2wgt[i]; - if (min2 > vwgt[i]+u2wgt[i]) - min2 = vwgt[i]+u2wgt[i]; - - sum1 += vwgt[i]+u1wgt[i]; - sum2 += vwgt[i]+u2wgt[i]; - } - - return ((max1-min1)/sum1) - ((max2-min2)/sum2); - } - else if (norm == 1) { - sum1 = sum2 = 0.0; - for (i=0; i limit) - return 0; - - return 1; -} - diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mmd.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mmd.c index 1b436181..c67b06e7 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mmd.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mmd.c @@ -16,10 +16,10 @@ * The meaning of invperm, and perm vectors is different from that * in genqmd_ of SparsPak * - * $Id: mmd.c,v 1.1 2003/07/16 15:55:11 karypis Exp $ + * $Id: mmd.c 22385 2019-06-03 22:08:48Z karypis $ */ -#include +#include "metislib.h" /************************************************************************* @@ -50,33 +50,33 @@ * marker -- a temporary marker vector. * Subroutines used -- mmdelm, mmdint, mmdnum, mmdupd. **************************************************************************/ -void genmmd(int neqns, idxtype *xadj, idxtype *adjncy, idxtype *invp, idxtype *perm, - int delta, idxtype *head, idxtype *qsize, idxtype *list, idxtype *marker, - int maxint, int *ncsub) +void genmmd(idx_t neqns, idx_t *xadj, idx_t *adjncy, idx_t *invp, idx_t *perm, + idx_t delta, idx_t *head, idx_t *qsize, idx_t *list, idx_t *marker, + idx_t maxint, idx_t *ncsub) { - int ehead, i, mdeg, mdlmt, mdeg_node, nextmd, num, tag; + idx_t ehead, i, mdeg, mdlmt, mdeg_node, nextmd, num, tag; if (neqns <= 0) return; - /* Adjust from C to Fortran */ + /* adjust from C to Fortran */ xadj--; adjncy--; invp--; perm--; head--; qsize--; list--; marker--; - /* initialization for the minimum degree algorithm. */ + /* initialization for the minimum degree algorithm */ *ncsub = 0; mmdint(neqns, xadj, adjncy, head, invp, perm, qsize, list, marker); - /* 'num' counts the number of ordered nodes plus 1. */ + /* 'num' counts the number of ordered nodes plus 1 */ num = 1; - /* eliminate all isolated nodes. */ + /* eliminate all isolated nodes */ nextmd = head[1]; while (nextmd > 0) { mdeg_node = nextmd; nextmd = invp[mdeg_node]; marker[mdeg_node] = maxint; invp[mdeg_node] = -num; - num = num + 1; + num++; } /* search for node of the minimum degree. 'mdeg' is the current */ @@ -87,14 +87,16 @@ void genmmd(int neqns, idxtype *xadj, idxtype *adjncy, idxtype *invp, idxtype *p head[1] = 0; mdeg = 2; - /* infinite loop here ! */ + /* infinite loop here */ while (1) { while (head[mdeg] <= 0) mdeg++; /* use value of 'delta' to set up 'mdlmt', which governs */ /* when a degree update is to be performed. */ - mdlmt = mdeg + delta; + //mdlmt = mdeg + delta; + // the need for gk_min() was identified by jsf67 + mdlmt = gk_min(neqns, mdeg+delta); ehead = 0; n500: @@ -107,7 +109,7 @@ void genmmd(int neqns, idxtype *xadj, idxtype *adjncy, idxtype *invp, idxtype *p mdeg_node = head[mdeg]; }; - /* remove 'mdeg_node' from the degree structure. */ + /* remove 'mdeg_node' from the degree structure */ nextmd = invp[mdeg_node]; head[mdeg] = nextmd; if (nextmd > 0) @@ -140,7 +142,7 @@ void genmmd(int neqns, idxtype *xadj, idxtype *adjncy, idxtype *invp, idxtype *p /* minimum degree nodes elimination. */ if (num > neqns) goto n1000; - mmdupd( ehead, neqns, xadj, adjncy, delta, &mdeg, head, invp, perm, qsize, list, marker, maxint, &tag); + mmdupd(ehead, neqns, xadj, adjncy, delta, &mdeg, head, invp, perm, qsize, list, marker, maxint, &tag); }; /* end of -- while ( 1 ) -- */ n1000: @@ -168,10 +170,10 @@ void genmmd(int neqns, idxtype *xadj, idxtype *adjncy, idxtype *invp, idxtype *p * marker -- marker vector. * list -- temporary linked list of eliminated nabors. ***************************************************************************/ -void mmdelm(int mdeg_node, idxtype *xadj, idxtype *adjncy, idxtype *head, idxtype *forward, - idxtype *backward, idxtype *qsize, idxtype *list, idxtype *marker, int maxint,int tag) +void mmdelm(idx_t mdeg_node, idx_t *xadj, idx_t *adjncy, idx_t *head, idx_t *forward, + idx_t *backward, idx_t *qsize, idx_t *list, idx_t *marker, idx_t maxint, idx_t tag) { - int element, i, istop, istart, j, + idx_t element, i, istop, istart, j, jstop, jstart, link, nabor, node, npv, nqnbrs, nxnode, pvnode, rlmt, rloc, rnode, xqnbr; @@ -289,6 +291,7 @@ void mmdelm(int mdeg_node, idxtype *xadj, idxtype *adjncy, idxtype *head, idxtyp return; } + /*************************************************************************** * mmdint ---- mult minimum degree initialization * purpose -- this routine performs initialization for the @@ -302,32 +305,33 @@ void mmdelm(int mdeg_node, idxtype *xadj, idxtype *adjncy, idxtype *head, idxtyp * list -- linked list. * marker -- marker vector. ****************************************************************************/ -int mmdint(int neqns, idxtype *xadj, idxtype *adjncy, idxtype *head, idxtype *forward, - idxtype *backward, idxtype *qsize, idxtype *list, idxtype *marker) +idx_t mmdint(idx_t neqns, idx_t *xadj, idx_t *adjncy, idx_t *head, idx_t *forward, + idx_t *backward, idx_t *qsize, idx_t *list, idx_t *marker) { - int fnode, ndeg, node; - - for ( node = 1; node <= neqns; node++ ) { - head[node] = 0; - qsize[node] = 1; - marker[node] = 0; - list[node] = 0; - }; - - /* initialize the degree doubly linked lists. */ - for ( node = 1; node <= neqns; node++ ) { - ndeg = xadj[node+1] - xadj[node]/* + 1*/; /* george */ - if (ndeg == 0) - ndeg = 1; - fnode = head[ndeg]; - forward[node] = fnode; - head[ndeg] = node; - if ( fnode > 0 ) backward[fnode] = node; - backward[node] = -ndeg; - }; - return 0; + idx_t fnode, ndeg, node; + + for (node=1; node<=neqns; node++) { + head[node] = 0; + qsize[node] = 1; + marker[node] = 0; + list[node] = 0; + }; + + /* initialize the degree doubly linked lists. */ + for (node=1; node<=neqns; node++) { + ndeg = xadj[node+1]-xadj[node]+1; + fnode = head[ndeg]; + forward[node] = fnode; + head[ndeg] = node; + if (fnode > 0) + backward[fnode] = node; + backward[node] = -ndeg; + }; + + return 0; } + /**************************************************************************** * mmdnum --- multi minimum degree numbering * purpose -- this routine performs the final step in producing @@ -345,9 +349,9 @@ int mmdint(int neqns, idxtype *xadj, idxtype *adjncy, idxtype *head, idxtype *f * output parameters -- * perm -- the permutation vector. ****************************************************************************/ -void mmdnum(int neqns, idxtype *perm, idxtype *invp, idxtype *qsize) +void mmdnum(idx_t neqns, idx_t *perm, idx_t *invp, idx_t *qsize) { - int father, nextf, node, nqsize, num, root; + idx_t father, nextf, node, nqsize, num, root; for ( node = 1; node <= neqns; node++ ) { nqsize = qsize[node]; @@ -391,6 +395,7 @@ void mmdnum(int neqns, idxtype *perm, idxtype *invp, idxtype *qsize) return; } + /**************************************************************************** * mmdupd ---- multiple minimum degree update * purpose -- this routine updates the degrees of nodes after a @@ -409,11 +414,11 @@ void mmdnum(int neqns, idxtype *perm, idxtype *invp, idxtype *qsize) * list -- marker vector for degree update. * *tag -- tag value. ****************************************************************************/ -void mmdupd(int ehead, int neqns, idxtype *xadj, idxtype *adjncy, int delta, int *mdeg, - idxtype *head, idxtype *forward, idxtype *backward, idxtype *qsize, idxtype *list, - idxtype *marker, int maxint,int *tag) +void mmdupd(idx_t ehead, idx_t neqns, idx_t *xadj, idx_t *adjncy, idx_t delta, idx_t *mdeg, + idx_t *head, idx_t *forward, idx_t *backward, idx_t *qsize, idx_t *list, + idx_t *marker, idx_t maxint, idx_t *tag) { - int deg, deg0, element, enode, fnode, i, iq2, istop, + idx_t deg, deg0, element, enode, fnode, i, iq2, istop, istart, j, jstop, jstart, link, mdeg0, mtag, nabor, node, q2head, qxhead; diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mpmetis.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mpmetis.c deleted file mode 100644 index 08554536..00000000 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mpmetis.c +++ /dev/null @@ -1,402 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * mpmetis.c - * - * This file contains the top level routines for the multilevel recursive - * bisection algorithm PMETIS. - * - * Started 7/24/97 - * George - * - * $Id: mpmetis.c,v 1.1 2003/07/16 15:55:12 karypis Exp $ - * - */ - -#include - - - -/************************************************************************* -* This function is the entry point for PWMETIS that accepts exact weights -* for the target partitions -**************************************************************************/ -void METIS_mCPartGraphRecursive(int *nvtxs, int *ncon, idxtype *xadj, idxtype *adjncy, - idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, - int *options, int *edgecut, idxtype *part) -{ - int i, j; - GraphType graph; - CtrlType ctrl; - - if (*numflag == 1) - Change2CNumbering(*nvtxs, xadj, adjncy); - - SetUpGraph(&graph, OP_PMETIS, *nvtxs, *ncon, xadj, adjncy, vwgt, adjwgt, *wgtflag); - - if (options[0] == 0) { /* Use the default parameters */ - ctrl.CType = McPMETIS_CTYPE; - ctrl.IType = McPMETIS_ITYPE; - ctrl.RType = McPMETIS_RTYPE; - ctrl.dbglvl = McPMETIS_DBGLVL; - } - else { - ctrl.CType = options[OPTION_CTYPE]; - ctrl.IType = options[OPTION_ITYPE]; - ctrl.RType = options[OPTION_RTYPE]; - ctrl.dbglvl = options[OPTION_DBGLVL]; - } - ctrl.optype = OP_PMETIS; - ctrl.CoarsenTo = 100; - - ctrl.nmaxvwgt = 1.5/(1.0*ctrl.CoarsenTo); - - InitRandom(-1); - - AllocateWorkSpace(&ctrl, &graph, *nparts); - - IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); - - *edgecut = MCMlevelRecursiveBisection(&ctrl, &graph, *nparts, part, 1.000, 0); - - IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); - IFSET(ctrl.dbglvl, DBG_TIME, PrintTimers(&ctrl)); - - FreeWorkSpace(&ctrl, &graph); - - if (*numflag == 1) - Change2FNumbering(*nvtxs, xadj, adjncy, part); -} - - - -/************************************************************************* -* This function is the entry point for PWMETIS that accepts exact weights -* for the target partitions -**************************************************************************/ -void METIS_mCHPartGraphRecursive(int *nvtxs, int *ncon, idxtype *xadj, idxtype *adjncy, - idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, - float *ubvec, int *options, int *edgecut, idxtype *part) -{ - int i, j; - GraphType graph; - CtrlType ctrl; - float *myubvec; - - if (*numflag == 1) - Change2CNumbering(*nvtxs, xadj, adjncy); - - SetUpGraph(&graph, OP_PMETIS, *nvtxs, *ncon, xadj, adjncy, vwgt, adjwgt, *wgtflag); - - if (options[0] == 0) { /* Use the default parameters */ - ctrl.CType = PMETIS_CTYPE; - ctrl.IType = PMETIS_ITYPE; - ctrl.RType = PMETIS_RTYPE; - ctrl.dbglvl = PMETIS_DBGLVL; - } - else { - ctrl.CType = options[OPTION_CTYPE]; - ctrl.IType = options[OPTION_ITYPE]; - ctrl.RType = options[OPTION_RTYPE]; - ctrl.dbglvl = options[OPTION_DBGLVL]; - } - ctrl.optype = OP_PMETIS; - ctrl.CoarsenTo = 100; - - ctrl.nmaxvwgt = 1.5/(1.0*ctrl.CoarsenTo); - - myubvec = fmalloc(*ncon, "PWMETIS: mytpwgts"); - scopy(*ncon, ubvec, myubvec); - - InitRandom(-1); - - AllocateWorkSpace(&ctrl, &graph, *nparts); - - IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); - - *edgecut = MCHMlevelRecursiveBisection(&ctrl, &graph, *nparts, part, myubvec, 0); - - IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); - IFSET(ctrl.dbglvl, DBG_TIME, PrintTimers(&ctrl)); - - FreeWorkSpace(&ctrl, &graph); - GKfree((void **)(&myubvec), LTERM); - - if (*numflag == 1) - Change2FNumbering(*nvtxs, xadj, adjncy, part); -} - - - -/************************************************************************* -* This function is the entry point for PWMETIS that accepts exact weights -* for the target partitions -**************************************************************************/ -void METIS_mCPartGraphRecursiveInternal(int *nvtxs, int *ncon, idxtype *xadj, idxtype *adjncy, - float *nvwgt, idxtype *adjwgt, int *nparts, int *options, int *edgecut, idxtype *part) -{ - int i, j; - GraphType graph; - CtrlType ctrl; - - SetUpGraph2(&graph, *nvtxs, *ncon, xadj, adjncy, nvwgt, adjwgt); - - if (options[0] == 0) { /* Use the default parameters */ - ctrl.CType = PMETIS_CTYPE; - ctrl.IType = PMETIS_ITYPE; - ctrl.RType = PMETIS_RTYPE; - ctrl.dbglvl = PMETIS_DBGLVL; - } - else { - ctrl.CType = options[OPTION_CTYPE]; - ctrl.IType = options[OPTION_ITYPE]; - ctrl.RType = options[OPTION_RTYPE]; - ctrl.dbglvl = options[OPTION_DBGLVL]; - } - ctrl.optype = OP_PMETIS; - ctrl.CoarsenTo = 100; - - ctrl.nmaxvwgt = 1.5/(1.0*ctrl.CoarsenTo); - - InitRandom(-1); - - AllocateWorkSpace(&ctrl, &graph, *nparts); - - IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); - - *edgecut = MCMlevelRecursiveBisection(&ctrl, &graph, *nparts, part, 1.000, 0); - - IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); - IFSET(ctrl.dbglvl, DBG_TIME, PrintTimers(&ctrl)); - - FreeWorkSpace(&ctrl, &graph); - -} - - -/************************************************************************* -* This function is the entry point for PWMETIS that accepts exact weights -* for the target partitions -**************************************************************************/ -void METIS_mCHPartGraphRecursiveInternal(int *nvtxs, int *ncon, idxtype *xadj, idxtype *adjncy, - float *nvwgt, idxtype *adjwgt, int *nparts, float *ubvec, int *options, int *edgecut, - idxtype *part) -{ - int i, j; - GraphType graph; - CtrlType ctrl; - float *myubvec; - - SetUpGraph2(&graph, *nvtxs, *ncon, xadj, adjncy, nvwgt, adjwgt); - - if (options[0] == 0) { /* Use the default parameters */ - ctrl.CType = PMETIS_CTYPE; - ctrl.IType = PMETIS_ITYPE; - ctrl.RType = PMETIS_RTYPE; - ctrl.dbglvl = PMETIS_DBGLVL; - } - else { - ctrl.CType = options[OPTION_CTYPE]; - ctrl.IType = options[OPTION_ITYPE]; - ctrl.RType = options[OPTION_RTYPE]; - ctrl.dbglvl = options[OPTION_DBGLVL]; - } - ctrl.optype = OP_PMETIS; - ctrl.CoarsenTo = 100; - - ctrl.nmaxvwgt = 1.5/(1.0*ctrl.CoarsenTo); - - myubvec = fmalloc(*ncon, "PWMETIS: mytpwgts"); - scopy(*ncon, ubvec, myubvec); - - InitRandom(-1); - - AllocateWorkSpace(&ctrl, &graph, *nparts); - - IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); - - *edgecut = MCHMlevelRecursiveBisection(&ctrl, &graph, *nparts, part, myubvec, 0); - - IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); - IFSET(ctrl.dbglvl, DBG_TIME, PrintTimers(&ctrl)); - - FreeWorkSpace(&ctrl, &graph); - GKfree((void **)&myubvec, LTERM); - -} - - - - -/************************************************************************* -* This function takes a graph and produces a bisection of it -**************************************************************************/ -int MCMlevelRecursiveBisection(CtrlType *ctrl, GraphType *graph, int nparts, idxtype *part, - float ubfactor, int fpart) -{ - int i, j, nvtxs, ncon, cut; - idxtype *label, *where; - GraphType lgraph, rgraph; - float tpwgts[2]; - - nvtxs = graph->nvtxs; - if (nvtxs == 0) { - printf("\t***Cannot bisect a graph with 0 vertices!\n\t***You are trying to partition a graph into too many parts!\n"); - return 0; - } - - /* Determine the weights of the partitions */ - tpwgts[0] = 1.0*(nparts>>1)/(1.0*nparts); - tpwgts[1] = 1.0 - tpwgts[0]; - - MCMlevelEdgeBisection(ctrl, graph, tpwgts, ubfactor); - cut = graph->mincut; - - label = graph->label; - where = graph->where; - for (i=0; i 2) - SplitGraphPart(ctrl, graph, &lgraph, &rgraph); - - /* Free the memory of the top level graph */ - GKfree((void **)&graph->gdata, &graph->nvwgt, &graph->rdata, &graph->label, LTERM); - - - /* Do the recursive call */ - if (nparts > 3) { - cut += MCMlevelRecursiveBisection(ctrl, &lgraph, nparts/2, part, ubfactor, fpart); - cut += MCMlevelRecursiveBisection(ctrl, &rgraph, nparts-nparts/2, part, ubfactor, fpart+nparts/2); - } - else if (nparts == 3) { - cut += MCMlevelRecursiveBisection(ctrl, &rgraph, nparts-nparts/2, part, ubfactor, fpart+nparts/2); - GKfree((void **)&lgraph.gdata, &lgraph.nvwgt, &lgraph.label, LTERM); - } - - return cut; - -} - - - -/************************************************************************* -* This function takes a graph and produces a bisection of it -**************************************************************************/ -int MCHMlevelRecursiveBisection(CtrlType *ctrl, GraphType *graph, int nparts, idxtype *part, - float *ubvec, int fpart) -{ - int i, j, nvtxs, ncon, cut; - idxtype *label, *where; - GraphType lgraph, rgraph; - float tpwgts[2], *npwgts, *lubvec, *rubvec; - - lubvec = rubvec = NULL; - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - if (nvtxs == 0) { - printf("\t***Cannot bisect a graph with 0 vertices!\n\t***You are trying to partition a graph into too many parts!\n"); - return 0; - } - - /* Determine the weights of the partitions */ - tpwgts[0] = 1.0*(nparts>>1)/(1.0*nparts); - tpwgts[1] = 1.0 - tpwgts[0]; - - /* For now, relax at the coarsest level only */ - if (nparts == 2) - MCHMlevelEdgeBisection(ctrl, graph, tpwgts, ubvec); - else - MCMlevelEdgeBisection(ctrl, graph, tpwgts, 1.000); - cut = graph->mincut; - - label = graph->label; - where = graph->where; - for (i=0; i 2) { - /* Adjust the ubvecs before the split */ - npwgts = graph->npwgts; - lubvec = fmalloc(ncon, "MCHMlevelRecursiveBisection"); - rubvec = fmalloc(ncon, "MCHMlevelRecursiveBisection"); - - for (i=0; igdata, &graph->nvwgt, &graph->rdata, &graph->label, LTERM); - - - /* Do the recursive call */ - if (nparts > 3) { - cut += MCHMlevelRecursiveBisection(ctrl, &lgraph, nparts/2, part, lubvec, fpart); - cut += MCHMlevelRecursiveBisection(ctrl, &rgraph, nparts-nparts/2, part, rubvec, fpart+nparts/2); - } - else if (nparts == 3) { - cut += MCHMlevelRecursiveBisection(ctrl, &rgraph, nparts-nparts/2, part, rubvec, fpart+nparts/2); - GKfree((void **)&lgraph.gdata, &lgraph.nvwgt, &lgraph.label, LTERM); - } - - GKfree((void **)&lubvec, &rubvec, LTERM); - - return cut; - -} - - - - -/************************************************************************* -* This function performs multilevel bisection -**************************************************************************/ -void MCMlevelEdgeBisection(CtrlType *ctrl, GraphType *graph, float *tpwgts, float ubfactor) -{ - GraphType *cgraph; - - cgraph = MCCoarsen2Way(ctrl, graph); - - MocInit2WayPartition(ctrl, cgraph, tpwgts, ubfactor); - - MocRefine2Way(ctrl, graph, cgraph, tpwgts, ubfactor); - -} - - - -/************************************************************************* -* This function performs multilevel bisection -**************************************************************************/ -void MCHMlevelEdgeBisection(CtrlType *ctrl, GraphType *graph, float *tpwgts, float *ubvec) -{ - int i; - GraphType *cgraph; - -/* - for (i=0; incon; i++) - printf("%.4f ", ubvec[i]); - printf("\n"); -*/ - - cgraph = MCCoarsen2Way(ctrl, graph); - - MocInit2WayPartition2(ctrl, cgraph, tpwgts, ubvec); - - MocRefine2Way2(ctrl, graph, cgraph, tpwgts, ubvec); - -} - - diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mrefine.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mrefine.c deleted file mode 100644 index 3e28dc75..00000000 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mrefine.c +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * refine.c - * - * This file contains the driving routines for multilevel refinement - * - * Started 7/24/97 - * George - * - * $Id: mrefine.c,v 1.1 2003/07/24 18:39:10 karypis Exp $ - */ - -#include - - -/************************************************************************* -* This function is the entry point of refinement -**************************************************************************/ -void MocRefine2Way(CtrlType *ctrl, GraphType *orggraph, GraphType *graph, float *tpwgts, float ubfactor) -{ - int i; - float tubvec[MAXNCON]; - - for (i=0; incon; i++) - tubvec[i] = 1.0; - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->UncoarsenTmr)); - - /* Compute the parameters of the coarsest graph */ - MocCompute2WayPartitionParams(ctrl, graph); - - for (;;) { - ASSERT(CheckBnd(graph)); - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->RefTmr)); - switch (ctrl->RType) { - case RTYPE_FM: - MocBalance2Way(ctrl, graph, tpwgts, 1.03); - MocFM_2WayEdgeRefine(ctrl, graph, tpwgts, 8); - break; - case 2: - MocBalance2Way(ctrl, graph, tpwgts, 1.03); - MocFM_2WayEdgeRefine2(ctrl, graph, tpwgts, tubvec, 8); - break; - default: - errexit("Unknown refinement type: %d\n", ctrl->RType); - } - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->RefTmr)); - - if (graph == orggraph) - break; - - graph = graph->finer; - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ProjectTmr)); - MocProject2WayPartition(ctrl, graph); - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ProjectTmr)); - } - - MocBalance2Way(ctrl, graph, tpwgts, 1.01); - MocFM_2WayEdgeRefine(ctrl, graph, tpwgts, 8); - - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->UncoarsenTmr)); -} - - -/************************************************************************* -* This function allocates memory for 2-way edge refinement -**************************************************************************/ -void MocAllocate2WayPartitionMemory(CtrlType *ctrl, GraphType *graph) -{ - int nvtxs, ncon; - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - - graph->rdata = idxmalloc(5*nvtxs, "Allocate2WayPartitionMemory: rdata"); - graph->where = graph->rdata; - graph->id = graph->rdata + nvtxs; - graph->ed = graph->rdata + 2*nvtxs; - graph->bndptr = graph->rdata + 3*nvtxs; - graph->bndind = graph->rdata + 4*nvtxs; - - graph->npwgts = fmalloc(2*ncon, "npwgts"); -} - - -/************************************************************************* -* This function computes the initial id/ed -**************************************************************************/ -void MocCompute2WayPartitionParams(CtrlType *ctrl, GraphType *graph) -{ - int i, j, k, l, nvtxs, ncon, nbnd, mincut; - idxtype *xadj, *adjncy, *adjwgt; - float *nvwgt, *npwgts; - idxtype *id, *ed, *where; - idxtype *bndptr, *bndind; - int me, other; - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - nvwgt = graph->nvwgt; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - where = graph->where; - npwgts = sset(2*ncon, 0.0, graph->npwgts); - id = idxset(nvtxs, 0, graph->id); - ed = idxset(nvtxs, 0, graph->ed); - bndptr = idxset(nvtxs, -1, graph->bndptr); - bndind = graph->bndind; - - - /*------------------------------------------------------------ - / Compute now the id/ed degrees - /------------------------------------------------------------*/ - nbnd = mincut = 0; - for (i=0; i= 0 && where[i] <= 1); - me = where[i]; - saxpy(ncon, 1.0, nvwgt+i*ncon, 1, npwgts+me*ncon, 1); - - for (j=xadj[i]; j 0 || xadj[i] == xadj[i+1]) { - mincut += ed[i]; - bndptr[i] = nbnd; - bndind[nbnd++] = i; - } - } - - graph->mincut = mincut/2; - graph->nbnd = nbnd; - -} - - - -/************************************************************************* -* This function projects a partition, and at the same time computes the -* parameters for refinement. -**************************************************************************/ -void MocProject2WayPartition(CtrlType *ctrl, GraphType *graph) -{ - int i, j, k, nvtxs, nbnd, me; - idxtype *xadj, *adjncy, *adjwgt, *adjwgtsum; - idxtype *cmap, *where, *id, *ed, *bndptr, *bndind; - idxtype *cwhere, *cid, *ced, *cbndptr; - GraphType *cgraph; - - cgraph = graph->coarser; - cwhere = cgraph->where; - cid = cgraph->id; - ced = cgraph->ed; - cbndptr = cgraph->bndptr; - - nvtxs = graph->nvtxs; - cmap = graph->cmap; - xadj = graph->xadj; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - adjwgtsum = graph->adjwgtsum; - - MocAllocate2WayPartitionMemory(ctrl, graph); - - where = graph->where; - id = idxset(nvtxs, 0, graph->id); - ed = idxset(nvtxs, 0, graph->ed); - bndptr = idxset(nvtxs, -1, graph->bndptr); - bndind = graph->bndind; - - - /* Go through and project partition and compute id/ed for the nodes */ - for (i=0; i 0 || xadj[i] == xadj[i+1]) { - bndptr[i] = nbnd; - bndind[nbnd++] = i; - } - } - } - } - - graph->mincut = cgraph->mincut; - graph->nbnd = nbnd; - scopy(2*graph->ncon, cgraph->npwgts, graph->npwgts); - - FreeGraph(graph->coarser); - graph->coarser = NULL; - -} - diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mrefine2.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mrefine2.c deleted file mode 100644 index 91ad0b58..00000000 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mrefine2.c +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * mrefine2.c - * - * This file contains the driving routines for multilevel refinement - * - * Started 7/24/97 - * George - * - * $Id: mrefine2.c,v 1.1 2003/07/16 15:55:12 karypis Exp $ - */ - -#include - - -/************************************************************************* -* This function is the entry point of refinement -**************************************************************************/ -void MocRefine2Way2(CtrlType *ctrl, GraphType *orggraph, GraphType *graph, float *tpwgts, - float *ubvec) -{ - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->UncoarsenTmr)); - - /* Compute the parameters of the coarsest graph */ - MocCompute2WayPartitionParams(ctrl, graph); - - for (;;) { - ASSERT(CheckBnd(graph)); - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->RefTmr)); - switch (ctrl->RType) { - case RTYPE_FM: - MocBalance2Way2(ctrl, graph, tpwgts, ubvec); - MocFM_2WayEdgeRefine2(ctrl, graph, tpwgts, ubvec, 8); - break; - default: - errexit("Unknown refinement type: %d\n", ctrl->RType); - } - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->RefTmr)); - - if (graph == orggraph) - break; - - graph = graph->finer; - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ProjectTmr)); - MocProject2WayPartition(ctrl, graph); - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ProjectTmr)); - } - - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->UncoarsenTmr)); -} - - diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mutil.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mutil.c deleted file mode 100644 index 68dc5c5e..00000000 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/mutil.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * mutil.c - * - * This file contains various utility functions for the MOC portion of the - * code - * - * Started 2/15/98 - * George - * - * $Id: mutil.c,v 1.1 2003/07/16 15:55:13 karypis Exp $ - * - */ - -#include - - -/************************************************************************* -* This function checks if the vertex weights of two vertices are below -* a given set of values -**************************************************************************/ -int AreAllVwgtsBelow(int ncon, float alpha, float *vwgt1, float beta, float *vwgt2, float limit) -{ - int i; - - for (i=0; i limit) - return 0; - - return 1; -} - - -/************************************************************************* -* This function checks if the vertex weights of two vertices are below -* a given set of values -**************************************************************************/ -int AreAnyVwgtsBelow(int ncon, float alpha, float *vwgt1, float beta, float *vwgt2, float limit) -{ - int i; - - for (i=0; i max) - max = npwgts[j*ncon+i]; - } - if (max*nparts > lb) - lb = max*nparts; - } - - return lb; -} - -/************************************************************************* -* This function checks if the vertex weights of two vertices are below -* a given set of values -**************************************************************************/ -int AreAllBelow(int ncon, float *v1, float *v2) -{ - int i; - - for (i=0; i v2[i]) - return 0; - - return 1; -} diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/myqsort.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/myqsort.c deleted file mode 100644 index a6939cef..00000000 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/myqsort.c +++ /dev/null @@ -1,547 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * myqsort.c - * - * This file contains a fast idxtype increasing qsort algorithm. - * Addopted from TeX - * - * Started 10/18/96 - * George - * - * $Id: myqsort.c,v 1.1 2003/07/16 15:55:13 karypis Exp $ - */ - -#include /* only for type declarations */ - -#define THRESH 1 /* threshold for insertion */ -#define MTHRESH 6 /* threshold for median */ - - - - -static void siqst(idxtype *, idxtype *); -static void iiqst(int *, int *); -static void keyiqst(KeyValueType *, KeyValueType *); -static void keyvaliqst(KeyValueType *, KeyValueType *); - - -/************************************************************************* -* Entry point of idxtype increasing sort -**************************************************************************/ -void iidxsort(int n, idxtype *base) -{ - register idxtype *i; - register idxtype *j; - register idxtype *lo; - register idxtype *hi; - register idxtype *min; - register idxtype c; - idxtype *max; - - if (n <= 1) - return; - - max = base + n; - - if (n >= THRESH) { - siqst(base, max); - hi = base + THRESH; - } - else - hi = max; - - for (j = lo = base; lo++ < hi;) { - if (*j > *lo) - j = lo; - } - if (j != base) { /* swap j into place */ - c = *base; - *base = *j; - *j = c; - } - - for (min = base; (hi = min += 1) < max;) { - while (*(--hi) > *min); - if ((hi += 1) != min) { - for (lo = min + 1; --lo >= min;) { - c = *lo; - for (i = j = lo; (j -= 1) >= hi; i = j) - *i = *j; - *i = c; - } - } - } -} - -static void siqst(idxtype *base, idxtype *max) -{ - register idxtype *i; - register idxtype *j; - register idxtype *jj; - register idxtype *mid; - register int ii; - register idxtype c; - idxtype *tmp; - int lo; - int hi; - - lo = max - base; /* number of elements as idxtype */ - do { - mid = base + ((unsigned) lo>>1); - if (lo >= MTHRESH) { - j = (*base > *mid ? base : mid); - tmp = max - 1; - if (*j > *tmp) { - j = (j == base ? mid : base); /* switch to first loser */ - if (*j < *tmp) - j = tmp; - } - - if (j != mid) { /* SWAP */ - c = *mid; - *mid = *j; - *j = c; - } - } - - /* Semi-standard quicksort partitioning/swapping */ - for (i = base, j = max - 1;;) { - while (i < mid && *i <= *mid) - i++; - while (j > mid) { - if (*mid <= *j) { - j--; - continue; - } - tmp = i + 1; /* value of i after swap */ - if (i == mid) /* j <-> mid, new mid is j */ - mid = jj = j; - else /* i <-> j */ - jj = j--; - goto swap; - } - - if (i == mid) - break; - else { /* i <-> mid, new mid is i */ - jj = mid; - tmp = mid = i; /* value of i after swap */ - j--; - } -swap: - c = *i; - *i = *jj; - *jj = c; - i = tmp; - } - - i = (j = mid) + 1; - if ((lo = j - base) <= (hi = max - i)) { - if (lo >= THRESH) - siqst(base, j); - base = i; - lo = hi; - } - else { - if (hi >= THRESH) - siqst(i, max); - max = j; - } - } while (lo >= THRESH); -} - - - - - -/************************************************************************* -* Entry point of int increasing sort -**************************************************************************/ -void iintsort(int n, int *base) -{ - register int *i; - register int *j; - register int *lo; - register int *hi; - register int *min; - register int c; - int *max; - - if (n <= 1) - return; - - max = base + n; - - if (n >= THRESH) { - iiqst(base, max); - hi = base + THRESH; - } - else - hi = max; - - for (j = lo = base; lo++ < hi;) { - if (*j > *lo) - j = lo; - } - if (j != base) { /* swap j into place */ - c = *base; - *base = *j; - *j = c; - } - - for (min = base; (hi = min += 1) < max;) { - while (*(--hi) > *min); - if ((hi += 1) != min) { - for (lo = min + 1; --lo >= min;) { - c = *lo; - for (i = j = lo; (j -= 1) >= hi; i = j) - *i = *j; - *i = c; - } - } - } -} - - -static void iiqst(int *base, int *max) -{ - register int *i; - register int *j; - register int *jj; - register int *mid; - register int ii; - register int c; - int *tmp; - int lo; - int hi; - - lo = max - base; /* number of elements as ints */ - do { - mid = base + ((unsigned) lo>>1); - if (lo >= MTHRESH) { - j = (*base > *mid ? base : mid); - tmp = max - 1; - if (*j > *tmp) { - j = (j == base ? mid : base); /* switch to first loser */ - if (*j < *tmp) - j = tmp; - } - - if (j != mid) { /* SWAP */ - c = *mid; - *mid = *j; - *j = c; - } - } - - /* Semi-standard quicksort partitioning/swapping */ - for (i = base, j = max - 1;;) { - while (i < mid && *i <= *mid) - i++; - while (j > mid) { - if (*mid <= *j) { - j--; - continue; - } - tmp = i + 1; /* value of i after swap */ - if (i == mid) /* j <-> mid, new mid is j */ - mid = jj = j; - else /* i <-> j */ - jj = j--; - goto swap; - } - - if (i == mid) - break; - else { /* i <-> mid, new mid is i */ - jj = mid; - tmp = mid = i; /* value of i after swap */ - j--; - } -swap: - c = *i; - *i = *jj; - *jj = c; - i = tmp; - } - - i = (j = mid) + 1; - if ((lo = j - base) <= (hi = max - i)) { - if (lo >= THRESH) - iiqst(base, j); - base = i; - lo = hi; - } - else { - if (hi >= THRESH) - iiqst(i, max); - max = j; - } - } while (lo >= THRESH); -} - - - - - -/************************************************************************* -* Entry point of KeyVal increasing sort, ONLY key part -**************************************************************************/ -void ikeysort(int n, KeyValueType *base) -{ - register KeyValueType *i; - register KeyValueType *j; - register KeyValueType *lo; - register KeyValueType *hi; - register KeyValueType *min; - register KeyValueType c; - KeyValueType *max; - - if (n <= 1) - return; - - max = base + n; - - if (n >= THRESH) { - keyiqst(base, max); - hi = base + THRESH; - } - else - hi = max; - - for (j = lo = base; lo++ < hi;) { - if (j->key > lo->key) - j = lo; - } - if (j != base) { /* swap j into place */ - c = *base; - *base = *j; - *j = c; - } - - for (min = base; (hi = min += 1) < max;) { - while ((--hi)->key > min->key); - if ((hi += 1) != min) { - for (lo = min + 1; --lo >= min;) { - c = *lo; - for (i = j = lo; (j -= 1) >= hi; i = j) - *i = *j; - *i = c; - } - } - } - - /* Sanity check */ - { - int i; - for (i=0; i base[i+1].key) - printf("Something went wrong!\n"); - } -} - - -static void keyiqst(KeyValueType *base, KeyValueType *max) -{ - register KeyValueType *i; - register KeyValueType *j; - register KeyValueType *jj; - register KeyValueType *mid; - register KeyValueType c; - KeyValueType *tmp; - int lo; - int hi; - - lo = (max - base)>>1; /* number of elements as KeyValueType */ - do { - mid = base + ((unsigned) lo>>1); - if (lo >= MTHRESH) { - j = (base->key > mid->key ? base : mid); - tmp = max - 1; - if (j->key > tmp->key) { - j = (j == base ? mid : base); /* switch to first loser */ - if (j->key < tmp->key) - j = tmp; - } - - if (j != mid) { /* SWAP */ - c = *mid; - *mid = *j; - *j = c; - } - } - - /* Semi-standard quicksort partitioning/swapping */ - for (i = base, j = max - 1;;) { - while (i < mid && i->key <= mid->key) - i++; - while (j > mid) { - if (mid->key <= j->key) { - j--; - continue; - } - tmp = i + 1; /* value of i after swap */ - if (i == mid) /* j <-> mid, new mid is j */ - mid = jj = j; - else /* i <-> j */ - jj = j--; - goto swap; - } - - if (i == mid) - break; - else { /* i <-> mid, new mid is i */ - jj = mid; - tmp = mid = i; /* value of i after swap */ - j--; - } -swap: - c = *i; - *i = *jj; - *jj = c; - i = tmp; - } - - i = (j = mid) + 1; - if ((lo = (j - base)>>1) <= (hi = (max - i)>>1)) { - if (lo >= THRESH) - keyiqst(base, j); - base = i; - lo = hi; - } - else { - if (hi >= THRESH) - keyiqst(i, max); - max = j; - } - } while (lo >= THRESH); -} - - - - -/************************************************************************* -* Entry point of KeyVal increasing sort, BOTH key and val part -**************************************************************************/ -void ikeyvalsort(int n, KeyValueType *base) -{ - register KeyValueType *i; - register KeyValueType *j; - register KeyValueType *lo; - register KeyValueType *hi; - register KeyValueType *min; - register KeyValueType c; - KeyValueType *max; - - if (n <= 1) - return; - - max = base + n; - - if (n >= THRESH) { - keyvaliqst(base, max); - hi = base + THRESH; - } - else - hi = max; - - for (j = lo = base; lo++ < hi;) { - if ((j->key > lo->key) || (j->key == lo->key && j->val > lo->val)) - j = lo; - } - if (j != base) { /* swap j into place */ - c = *base; - *base = *j; - *j = c; - } - - for (min = base; (hi = min += 1) < max;) { - while ((--hi)->key > min->key || (hi->key == min->key && hi->val > min->val)); - if ((hi += 1) != min) { - for (lo = min + 1; --lo >= min;) { - c = *lo; - for (i = j = lo; (j -= 1) >= hi; i = j) - *i = *j; - *i = c; - } - } - } -} - - -static void keyvaliqst(KeyValueType *base, KeyValueType *max) -{ - register KeyValueType *i; - register KeyValueType *j; - register KeyValueType *jj; - register KeyValueType *mid; - register KeyValueType c; - KeyValueType *tmp; - int lo; - int hi; - - lo = (max - base)>>1; /* number of elements as KeyValueType */ - do { - mid = base + ((unsigned) lo>>1); - if (lo >= MTHRESH) { - j = (base->key > mid->key || (base->key == mid->key && base->val > mid->val) ? base : mid); - tmp = max - 1; - if (j->key > tmp->key || (j->key == tmp->key && j->val > tmp->val)) { - j = (j == base ? mid : base); /* switch to first loser */ - if (j->key < tmp->key || (j->key == tmp->key && j->val < tmp->val)) - j = tmp; - } - - if (j != mid) { /* SWAP */ - c = *mid; - *mid = *j; - *j = c; - } - } - - /* Semi-standard quicksort partitioning/swapping */ - for (i = base, j = max - 1;;) { - while (i < mid && (i->key < mid->key || (i->key == mid->key && i->val <= mid->val))) - i++; - while (j > mid) { - if (mid->key < j->key || (mid->key == j->key && mid->val <= j->val)) { - j--; - continue; - } - tmp = i + 1; /* value of i after swap */ - if (i == mid) /* j <-> mid, new mid is j */ - mid = jj = j; - else /* i <-> j */ - jj = j--; - goto swap; - } - - if (i == mid) - break; - else { /* i <-> mid, new mid is i */ - jj = mid; - tmp = mid = i; /* value of i after swap */ - j--; - } -swap: - c = *i; - *i = *jj; - *jj = c; - i = tmp; - } - - i = (j = mid) + 1; - if ((lo = (j - base)>>1) <= (hi = (max - i)>>1)) { - if (lo >= THRESH) - keyvaliqst(base, j); - base = i; - lo = hi; - } - else { - if (hi >= THRESH) - keyvaliqst(i, max); - max = j; - } - } while (lo >= THRESH); -} diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/ometis.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/ometis.c index f617d4c3..51e39754 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/ometis.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/ometis.c @@ -9,551 +9,463 @@ * Started 7/24/97 * George * - * $Id: ometis.c,v 1.2 2003/07/31 06:14:01 karypis Exp $ + * $Id: ometis.c 10513 2011-07-07 22:06:03Z karypis $ * */ -#include - - -/************************************************************************* -* This function is the entry point for OEMETIS -**************************************************************************/ -void METIS_EdgeND(int *nvtxs, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, - idxtype *perm, idxtype *iperm) +#include "metislib.h" + + +/*************************************************************************/ +/*! This function is the entry point for the multilevel nested dissection + ordering code. At each bisection, a node-separator is computed using + a node-based refinement approach. + + \param nvtxs is the number of vertices in the graph. + \param xadj is of length nvtxs+1 marking the start of the adjancy + list of each vertex in adjncy. + \param adjncy stores the adjacency lists of the vertices. The adjnacy + list of a vertex should not contain the vertex itself. + \param vwgt is an array of size nvtxs storing the weight of each + vertex. If vwgt is NULL, then the vertices are considered + to have unit weight. + \param numflag is either 0 or 1 indicating that the numbering of + the vertices starts from 0 or 1, respectively. + \param options is an array of size METIS_NOPTIONS used to pass + various options impacting the of the algorithm. A NULL + value indicates use of default options. + \param perm is an array of size nvtxs such that if A and A' are + the original and permuted matrices, then A'[i] = A[perm[i]]. + \param iperm is an array of size nvtxs such that if A and A' are + the original and permuted matrices, then A[i] = A'[iperm[i]]. +*/ +/*************************************************************************/ +int METIS_NodeND(idx_t *nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t *options, idx_t *perm, idx_t *iperm) { - int i, j; - GraphType graph; - CtrlType ctrl; - - if (*numflag == 1) - Change2CNumbering(*nvtxs, xadj, adjncy); - - SetUpGraph(&graph, OP_OEMETIS, *nvtxs, 1, xadj, adjncy, NULL, NULL, 0); - - if (options[0] == 0) { /* Use the default parameters */ - ctrl.CType = OEMETIS_CTYPE; - ctrl.IType = OEMETIS_ITYPE; - ctrl.RType = OEMETIS_RTYPE; - ctrl.dbglvl = OEMETIS_DBGLVL; - } - else { - ctrl.CType = options[OPTION_CTYPE]; - ctrl.IType = options[OPTION_ITYPE]; - ctrl.RType = options[OPTION_RTYPE]; - ctrl.dbglvl = options[OPTION_DBGLVL]; - } - ctrl.oflags = 0; - ctrl.pfactor = -1; - ctrl.nseps = 1; - - ctrl.optype = OP_OEMETIS; - ctrl.CoarsenTo = 20; - ctrl.maxvwgt = 1.5*(idxsum(*nvtxs, graph.vwgt)/ctrl.CoarsenTo); - - InitRandom(-1); + int sigrval=0, renumber=0; + idx_t i, ii, j, l, nnvtxs=0; + graph_t *graph=NULL; + ctrl_t *ctrl; + idx_t *cptr, *cind, *piperm; + int numflag = 0; - AllocateWorkSpace(&ctrl, &graph, 2); + /* set up malloc cleaning code and signal catchers */ + if (!gk_malloc_init()) + return METIS_ERROR_MEMORY; - IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); + gk_sigtrap(); - MlevelNestedDissection(&ctrl, &graph, iperm, ORDER_UNBALANCE_FRACTION, *nvtxs); + if ((sigrval = gk_sigcatch()) != 0) + goto SIGTHROW; - IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); - IFSET(ctrl.dbglvl, DBG_TIME, PrintTimers(&ctrl)); - for (i=0; i<*nvtxs; i++) - perm[iperm[i]] = i; - - FreeWorkSpace(&ctrl, &graph); - - if (*numflag == 1) - Change2FNumberingOrder(*nvtxs, xadj, adjncy, perm, iperm); -} - - -/************************************************************************* -* This function is the entry point for ONCMETIS -**************************************************************************/ -void METIS_NodeND(int *nvtxs, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, - idxtype *perm, idxtype *iperm) -{ - int i, ii, j, l, wflag, nflag; - GraphType graph; - CtrlType ctrl; - idxtype *cptr, *cind, *piperm; + /* set up the run time parameters */ + ctrl = SetupCtrl(METIS_OP_OMETIS, options, 1, 3, NULL, NULL); + if (!ctrl) { + gk_siguntrap(); + return METIS_ERROR_INPUT; + } - if (*numflag == 1) + /* if required, change the numbering to 0 */ + if (ctrl->numflag == 1) { Change2CNumbering(*nvtxs, xadj, adjncy); - - if (options[0] == 0) { /* Use the default parameters */ - ctrl.CType = ONMETIS_CTYPE; - ctrl.IType = ONMETIS_ITYPE; - ctrl.RType = ONMETIS_RTYPE; - ctrl.dbglvl = ONMETIS_DBGLVL; - ctrl.oflags = ONMETIS_OFLAGS; - ctrl.pfactor = ONMETIS_PFACTOR; - ctrl.nseps = ONMETIS_NSEPS; + renumber = 1; } - else { - ctrl.CType = options[OPTION_CTYPE]; - ctrl.IType = options[OPTION_ITYPE]; - ctrl.RType = options[OPTION_RTYPE]; - ctrl.dbglvl = options[OPTION_DBGLVL]; - ctrl.oflags = options[OPTION_OFLAGS]; - ctrl.pfactor = options[OPTION_PFACTOR]; - ctrl.nseps = options[OPTION_NSEPS]; - } - if (ctrl.nseps < 1) - ctrl.nseps = 1; - - ctrl.optype = OP_ONMETIS; - ctrl.CoarsenTo = 100; - - IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); - InitRandom(-1); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, InitTimers(ctrl)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->TotalTmr)); - if (ctrl.pfactor > 0) { - /*============================================================ - * Prune the dense columns - ==============================================================*/ - piperm = idxmalloc(*nvtxs, "ONMETIS: piperm"); + /* prune the dense columns */ + if (ctrl->pfactor > 0.0) { + piperm = imalloc(*nvtxs, "OMETIS: piperm"); - PruneGraph(&ctrl, &graph, *nvtxs, xadj, adjncy, piperm, (float)(0.1*ctrl.pfactor)); - } - else if (ctrl.oflags&OFLAG_COMPRESS) { - /*============================================================ - * Compress the graph - ==============================================================*/ - cptr = idxmalloc(*nvtxs+1, "ONMETIS: cptr"); - cind = idxmalloc(*nvtxs, "ONMETIS: cind"); - - CompressGraph(&ctrl, &graph, *nvtxs, xadj, adjncy, cptr, cind); - - if (graph.nvtxs >= COMPRESSION_FRACTION*(*nvtxs)) { - ctrl.oflags--; /* We actually performed no compression */ - GKfree((void **)&cptr, &cind, LTERM); + graph = PruneGraph(ctrl, *nvtxs, xadj, adjncy, vwgt, piperm, ctrl->pfactor); + if (graph == NULL) { + /* if there was no prunning, cleanup the pfactor */ + gk_free((void **)&piperm, LTERM); + ctrl->pfactor = 0.0; + } + else { + nnvtxs = graph->nvtxs; + ctrl->compress = 0; /* disable compression if prunning took place */ } - else if (2*graph.nvtxs < *nvtxs && ctrl.nseps == 1) - ctrl.nseps = 2; - } - else { - SetUpGraph(&graph, OP_ONMETIS, *nvtxs, 1, xadj, adjncy, NULL, NULL, 0); } - - /*============================================================= - * Do the nested dissection ordering - --=============================================================*/ - ctrl.maxvwgt = 1.5*(idxsum(graph.nvtxs, graph.vwgt)/ctrl.CoarsenTo); - AllocateWorkSpace(&ctrl, &graph, 2); - - if (ctrl.oflags&OFLAG_CCMP) - MlevelNestedDissectionCC(&ctrl, &graph, iperm, ORDER_UNBALANCE_FRACTION, graph.nvtxs); - else - MlevelNestedDissection(&ctrl, &graph, iperm, ORDER_UNBALANCE_FRACTION, graph.nvtxs); - - FreeWorkSpace(&ctrl, &graph); - - if (ctrl.pfactor > 0) { /* Order any prunned vertices */ - if (graph.nvtxs < *nvtxs) { - idxcopy(graph.nvtxs, iperm, perm); /* Use perm as an auxiliary array */ - for (i=0; icompress) { + cptr = imalloc(*nvtxs+1, "OMETIS: cptr"); + cind = imalloc(*nvtxs, "OMETIS: cind"); + + graph = CompressGraph(ctrl, *nvtxs, xadj, adjncy, vwgt, cptr, cind); + if (graph == NULL) { + /* if there was no compression, cleanup the compress flag */ + gk_free((void **)&cptr, &cind, LTERM); + ctrl->compress = 0; } - - GKfree((void **)&piperm, LTERM); - } - else if (ctrl.oflags&OFLAG_COMPRESS) { /* Uncompress the ordering */ - if (graph.nvtxs < COMPRESSION_FRACTION*(*nvtxs)) { - /* construct perm from iperm */ - for (i=0; invtxs; + ctrl->cfactor = 1.0*(*nvtxs)/nnvtxs; + if (ctrl->cfactor > 1.5 && ctrl->nseps == 1) + ctrl->nseps = 2; + //ctrl->nseps = (idx_t)(ctrl->cfactor*ctrl->nseps); } - - GKfree((void **)&cptr, &cind, LTERM); } + /* if no prunning and no compression, setup the graph in the normal way. */ + if (ctrl->pfactor == 0.0 && ctrl->compress == 0) + graph = SetupGraph(ctrl, *nvtxs, 1, xadj, adjncy, vwgt, NULL, NULL); - for (i=0; i<*nvtxs; i++) - perm[iperm[i]] = i; - - IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); - IFSET(ctrl.dbglvl, DBG_TIME, PrintTimers(&ctrl)); - - if (*numflag == 1) - Change2FNumberingOrder(*nvtxs, xadj, adjncy, perm, iperm); - -} + ASSERT(CheckGraph(graph, ctrl->numflag, 1)); + /* allocate workspace memory */ + AllocateWorkSpace(ctrl, graph); -/************************************************************************* -* This function is the entry point for ONWMETIS. It requires weights on the -* vertices. It is for the case that the matrix has been pre-compressed. -**************************************************************************/ -void METIS_NodeWND(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, int *numflag, - int *options, idxtype *perm, idxtype *iperm) -{ - int i, j, tvwgt; - GraphType graph; - CtrlType ctrl; + /* do the nested dissection ordering */ + if (ctrl->ccorder) + MlevelNestedDissectionCC(ctrl, graph, iperm, graph->nvtxs); + else + MlevelNestedDissection(ctrl, graph, iperm, graph->nvtxs); - if (*numflag == 1) - Change2CNumbering(*nvtxs, xadj, adjncy); - SetUpGraph(&graph, OP_ONMETIS, *nvtxs, 1, xadj, adjncy, vwgt, NULL, 2); + if (ctrl->pfactor > 0.0) { /* Order any prunned vertices */ + icopy(nnvtxs, iperm, perm); /* Use perm as an auxiliary array */ + for (i=0; icompress) { /* Uncompress the ordering */ + /* construct perm from iperm */ + for (i=0; idbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->TotalTmr)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, PrintTimers(ctrl)); + + /* clean up */ + FreeCtrl(&ctrl); - if (*numflag == 1) +SIGTHROW: + /* if required, change the numbering back to 1 */ + if (renumber) Change2FNumberingOrder(*nvtxs, xadj, adjncy, perm, iperm); -} + gk_siguntrap(); + gk_malloc_cleanup(0); + return metis_rcode(sigrval); +} -/************************************************************************* -* This function takes a graph and produces a bisection of it -**************************************************************************/ -void MlevelNestedDissection(CtrlType *ctrl, GraphType *graph, idxtype *order, float ubfactor, int lastvtx) +/*************************************************************************/ +/*! This is the driver for the recursive tri-section of a graph into the + left, separator, and right partitions. The graphs correspond to the + left and right parts are further tri-sected in a recursive fashion. + The nodes in the separator are ordered at the end of the left & right + nodes. + */ +/*************************************************************************/ +void MlevelNestedDissection(ctrl_t *ctrl, graph_t *graph, idx_t *order, + idx_t lastvtx) { - int i, j, nvtxs, nbnd, tvwgt, tpwgts2[2]; - idxtype *label, *bndind; - GraphType lgraph, rgraph; + idx_t i, j, nvtxs, nbnd; + idx_t *label, *bndind; + graph_t *lgraph, *rgraph; nvtxs = graph->nvtxs; - /* Determine the weights of the partitions */ - tvwgt = idxsum(nvtxs, graph->vwgt); - tpwgts2[0] = tvwgt/2; - tpwgts2[1] = tvwgt-tpwgts2[0]; + MlevelNodeBisectionMultiple(ctrl, graph); - switch (ctrl->optype) { - case OP_OEMETIS: - MlevelEdgeBisection(ctrl, graph, tpwgts2, ubfactor); + IFSET(ctrl->dbglvl, METIS_DBG_SEPINFO, + printf("Nvtxs: %6"PRIDX", [%6"PRIDX" %6"PRIDX" %6"PRIDX"]\n", + graph->nvtxs, graph->pwgts[0], graph->pwgts[1], graph->pwgts[2])); - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->SepTmr)); - ConstructMinCoverSeparator(ctrl, graph, ubfactor); - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->SepTmr)); - - break; - case OP_ONMETIS: - MlevelNodeBisectionMultiple(ctrl, graph, tpwgts2, ubfactor); - - IFSET(ctrl->dbglvl, DBG_SEPINFO, printf("Nvtxs: %6d, [%6d %6d %6d]\n", graph->nvtxs, graph->pwgts[0], graph->pwgts[1], graph->pwgts[2])); - - break; - } /* Order the nodes in the separator */ - nbnd = graph->nbnd; + nbnd = graph->nbnd; bndind = graph->bndind; - label = graph->label; + label = graph->label; for (i=0; igdata, &graph->rdata, &graph->label, LTERM); + FreeGraph(&graph); - if (rgraph.nvtxs > MMDSWITCH) - MlevelNestedDissection(ctrl, &rgraph, order, ubfactor, lastvtx); + /* Recurse on lgraph first, as its lastvtx depends on rgraph->nvtxs, which + will not be defined upon return from MlevelNestedDissection. */ + if (lgraph->nvtxs > MMDSWITCH && lgraph->nedges > 0) + MlevelNestedDissection(ctrl, lgraph, order, lastvtx-rgraph->nvtxs); else { - MMDOrder(ctrl, &rgraph, order, lastvtx); - GKfree((void **)&rgraph.gdata, &rgraph.rdata, &rgraph.label, LTERM); + MMDOrder(ctrl, lgraph, order, lastvtx-rgraph->nvtxs); + FreeGraph(&lgraph); } - if (lgraph.nvtxs > MMDSWITCH) - MlevelNestedDissection(ctrl, &lgraph, order, ubfactor, lastvtx-rgraph.nvtxs); + if (rgraph->nvtxs > MMDSWITCH && rgraph->nedges > 0) + MlevelNestedDissection(ctrl, rgraph, order, lastvtx); else { - MMDOrder(ctrl, &lgraph, order, lastvtx-rgraph.nvtxs); - GKfree((void **)&lgraph.gdata, &lgraph.rdata, &lgraph.label, LTERM); + MMDOrder(ctrl, rgraph, order, lastvtx); + FreeGraph(&rgraph); } } -/************************************************************************* -* This function takes a graph and produces a bisection of it -**************************************************************************/ -void MlevelNestedDissectionCC(CtrlType *ctrl, GraphType *graph, idxtype *order, float ubfactor, int lastvtx) +/*************************************************************************/ +/*! This routine is similar to its non 'CC' counterpart. The difference is + that after each tri-section, the connected components of the original + graph that result after removing the separator vertises are ordered + independently (i.e., this may lead to more than just the left and + the right subgraphs). +*/ +/*************************************************************************/ +void MlevelNestedDissectionCC(ctrl_t *ctrl, graph_t *graph, idx_t *order, + idx_t lastvtx) { - int i, j, nvtxs, nbnd, tvwgt, tpwgts2[2], nsgraphs, ncmps, rnvtxs; - idxtype *label, *bndind; - idxtype *cptr, *cind; - GraphType *sgraphs; + idx_t i, j, nvtxs, nbnd, ncmps, rnvtxs, snvtxs; + idx_t *label, *bndind; + idx_t *cptr, *cind; + graph_t **sgraphs; nvtxs = graph->nvtxs; - /* Determine the weights of the partitions */ - tvwgt = idxsum(nvtxs, graph->vwgt); - tpwgts2[0] = tvwgt/2; - tpwgts2[1] = tvwgt-tpwgts2[0]; + MlevelNodeBisectionMultiple(ctrl, graph); - MlevelNodeBisectionMultiple(ctrl, graph, tpwgts2, ubfactor); - IFSET(ctrl->dbglvl, DBG_SEPINFO, printf("Nvtxs: %6d, [%6d %6d %6d]\n", graph->nvtxs, graph->pwgts[0], graph->pwgts[1], graph->pwgts[2])); + IFSET(ctrl->dbglvl, METIS_DBG_SEPINFO, + printf("Nvtxs: %6"PRIDX", [%6"PRIDX" %6"PRIDX" %6"PRIDX"]\n", + graph->nvtxs, graph->pwgts[0], graph->pwgts[1], graph->pwgts[2])); /* Order the nodes in the separator */ - nbnd = graph->nbnd; + nbnd = graph->nbnd; bndind = graph->bndind; - label = graph->label; + label = graph->label; for (i=0; i 2) - printf("[%5d] has %3d components\n", nvtxs, ncmps); -*/ - - sgraphs = (GraphType *)GKmalloc(ncmps*sizeof(GraphType), "MlevelNestedDissectionCC: sgraphs"); + WCOREPUSH; + cptr = iwspacemalloc(ctrl, nvtxs+1); + cind = iwspacemalloc(ctrl, nvtxs); + ncmps = FindSepInducedComponents(ctrl, graph, cptr, cind); - nsgraphs = SplitGraphOrderCC(ctrl, graph, sgraphs, ncmps, cptr, cind); + if (ctrl->dbglvl&METIS_DBG_INFO) { + if (ncmps > 2) + printf(" Bisection resulted in %"PRIDX" connected components\n", ncmps); + } + + sgraphs = SplitGraphOrderCC(ctrl, graph, ncmps, cptr, cind); - GKfree((void **)&cptr, &cind, LTERM); + WCOREPOP; /* Free the memory of the top level graph */ - GKfree((void **)&graph->gdata, &graph->rdata, &graph->label, LTERM); + FreeGraph(&graph); /* Go and process the subgraphs */ - for (rnvtxs=i=0; invtxs; + + if (sgraphs[i]->nvtxs > MMDSWITCH && sgraphs[i]->nedges > 0) { + MlevelNestedDissectionCC(ctrl, sgraphs[i], order, lastvtx-rnvtxs); } else { - MlevelNestedDissectionCC(ctrl, sgraphs+i, order, ubfactor, lastvtx-rnvtxs); + MMDOrder(ctrl, sgraphs[i], order, lastvtx-rnvtxs); + FreeGraph(&sgraphs[i]); } - rnvtxs += sgraphs[i].nvtxs; + rnvtxs += snvtxs; } - GKfree((void **)&sgraphs, LTERM); -} + gk_free((void **)&sgraphs, LTERM); +} -/************************************************************************* -* This function performs multilevel bisection. It performs multiple -* bisections and selects the best. -**************************************************************************/ -void MlevelNodeBisectionMultiple(CtrlType *ctrl, GraphType *graph, int *tpwgts, float ubfactor) +/*************************************************************************/ +/*! This function performs multilevel node bisection (i.e., tri-section). + It performs multiple bisections and selects the best. */ +/*************************************************************************/ +void MlevelNodeBisectionMultiple(ctrl_t *ctrl, graph_t *graph) { - int i, nvtxs, cnvtxs, mincut, tmp; - GraphType *cgraph; - idxtype *bestwhere; + idx_t i, mincut; + idx_t *bestwhere; - if (ctrl->nseps == 1 || graph->nvtxs < (ctrl->oflags&OFLAG_COMPRESS ? 1000 : 2000)) { - MlevelNodeBisection(ctrl, graph, tpwgts, ubfactor); + /* if the graph is small, just find a single vertex separator */ + if (ctrl->nseps == 1 || graph->nvtxs < (ctrl->compress ? 1000 : 2000)) { + MlevelNodeBisectionL2(ctrl, graph, LARGENIPARTS); return; } - nvtxs = graph->nvtxs; - - if (ctrl->oflags&OFLAG_COMPRESS) { /* Multiple separators at the original graph */ - bestwhere = idxmalloc(nvtxs, "MlevelNodeBisection2: bestwhere"); - - for (i=0; inseps; i++) { - MlevelNodeBisection(ctrl, graph, tpwgts, ubfactor); + WCOREPUSH; - /* printf("Nvtxs: %6d, [%6d %6d %6d] [%d %d]\n", graph->nvtxs, - graph->pwgts[0], graph->pwgts[1], graph->pwgts[2], i, ctrl->nseps); */ + bestwhere = iwspacemalloc(ctrl, graph->nvtxs); - if (i == 0 || graph->mincut < mincut) { - mincut = graph->mincut; - idxcopy(nvtxs, graph->where, bestwhere); - } + mincut = graph->tvwgt[0]; + for (i=0; inseps; i++) { + MlevelNodeBisectionL2(ctrl, graph, LARGENIPARTS); - GKfree((void **)&graph->rdata, LTERM); - - if (mincut == 0) - break; + if (i == 0 || graph->mincut < mincut) { + mincut = graph->mincut; + if (i < ctrl->nseps-1) + icopy(graph->nvtxs, graph->where, bestwhere); } - /* printf("[%5d]\n", mincut); */ - Allocate2WayNodePartitionMemory(ctrl, graph); - idxcopy(nvtxs, bestwhere, graph->where); - GKfree((void **)&bestwhere, LTERM); + if (mincut == 0) + break; + if (i < ctrl->nseps-1) + FreeRData(graph); + } + + if (mincut != graph->mincut) { + icopy(graph->nvtxs, bestwhere, graph->where); Compute2WayNodePartitionParams(ctrl, graph); } - else { /* Coarsen it a bit */ - ctrl->CoarsenTo = nvtxs-1; - cgraph = Coarsen2Way(ctrl, graph); + WCOREPOP; +} - cnvtxs = cgraph->nvtxs; - bestwhere = idxmalloc(cnvtxs, "MlevelNodeBisection2: bestwhere"); +/*************************************************************************/ +/*! This function performs multilevel node bisection (i.e., tri-section). + It performs multiple bisections and selects the best. */ +/*************************************************************************/ +void MlevelNodeBisectionL2(ctrl_t *ctrl, graph_t *graph, idx_t niparts) +{ + idx_t i, mincut, nruns=5; + graph_t *cgraph; + idx_t *bestwhere; - for (i=0; inseps; i++) { - ctrl->CType += 20; /* This is a hack. Look at coarsen.c */ - MlevelNodeBisection(ctrl, cgraph, tpwgts, ubfactor); + /* if the graph is small, just find a single vertex separator */ + if (graph->nvtxs < 5000) { + MlevelNodeBisectionL1(ctrl, graph, niparts); + return; + } - /* printf("%5d ", cgraph->mincut); */ + WCOREPUSH; - if (i == 0 || cgraph->mincut < mincut) { - mincut = cgraph->mincut; - idxcopy(cnvtxs, cgraph->where, bestwhere); - } + ctrl->CoarsenTo = gk_max(100, graph->nvtxs/30); - GKfree((void **)&cgraph->rdata, LTERM); - - if (mincut == 0) - break; - } - /* printf("[%5d]\n", mincut); */ + cgraph = CoarsenGraphNlevels(ctrl, graph, 4); + + bestwhere = iwspacemalloc(ctrl, cgraph->nvtxs); + + mincut = graph->tvwgt[0]; + for (i=0; iwhere); - GKfree((void **)&bestwhere, LTERM); + if (i == 0 || cgraph->mincut < mincut) { + mincut = cgraph->mincut; + if (i < nruns-1) + icopy(cgraph->nvtxs, cgraph->where, bestwhere); + } - Compute2WayNodePartitionParams(ctrl, cgraph); + if (mincut == 0) + break; - Refine2WayNode(ctrl, graph, cgraph, ubfactor); + if (i < nruns-1) + FreeRData(cgraph); } + if (mincut != cgraph->mincut) + icopy(cgraph->nvtxs, bestwhere, cgraph->where); + + WCOREPOP; + + Refine2WayNode(ctrl, graph, cgraph); + } -/************************************************************************* -* This function performs multilevel bisection -**************************************************************************/ -void MlevelNodeBisection(CtrlType *ctrl, GraphType *graph, int *tpwgts, float ubfactor) +/*************************************************************************/ +/*! The top-level routine of the actual multilevel node bisection */ +/*************************************************************************/ +void MlevelNodeBisectionL1(ctrl_t *ctrl, graph_t *graph, idx_t niparts) { - GraphType *cgraph; + graph_t *cgraph; ctrl->CoarsenTo = graph->nvtxs/8; if (ctrl->CoarsenTo > 100) ctrl->CoarsenTo = 100; else if (ctrl->CoarsenTo < 40) - ctrl->CoarsenTo = amin(40, graph->nvtxs-1); - ctrl->maxvwgt = 1.25*((tpwgts[0]+tpwgts[1])/ctrl->CoarsenTo); - - cgraph = Coarsen2Way(ctrl, graph); + ctrl->CoarsenTo = 40; - switch (ctrl->IType) { - case IPART_GGPKL: - Init2WayPartition(ctrl, cgraph, tpwgts, ubfactor); + cgraph = CoarsenGraph(ctrl, graph); - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->SepTmr)); - - Compute2WayPartitionParams(ctrl, cgraph); - ConstructSeparator(ctrl, cgraph, ubfactor); - - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->SepTmr)); - break; - case IPART_GGPKLNODE: - InitSeparator(ctrl, cgraph, ubfactor); - break; - } - - Refine2WayNode(ctrl, graph, cgraph, ubfactor); + niparts = gk_max(1, (cgraph->nvtxs <= ctrl->CoarsenTo ? niparts/2: niparts)); + /*niparts = (cgraph->nvtxs <= ctrl->CoarsenTo ? SMALLNIPARTS : LARGENIPARTS);*/ + InitSeparator(ctrl, cgraph, niparts); + Refine2WayNode(ctrl, graph, cgraph); } - - -/************************************************************************* -* This function takes a graph and a bisection and splits it into two graphs. -* This function relies on the fact that adjwgt is all equal to 1. -**************************************************************************/ -void SplitGraphOrder(CtrlType *ctrl, GraphType *graph, GraphType *lgraph, GraphType *rgraph) +/*************************************************************************/ +/*! This function takes a graph and a tri-section (left, right, separator) + and splits it into two graphs. + + This function relies on the fact that adjwgt is all equal to 1. +*/ +/*************************************************************************/ +void SplitGraphOrder(ctrl_t *ctrl, graph_t *graph, graph_t **r_lgraph, + graph_t **r_rgraph) { - int i, ii, j, k, l, istart, iend, mypart, nvtxs, snvtxs[3], snedges[3]; - idxtype *xadj, *vwgt, *adjncy, *adjwgt, *adjwgtsum, *label, *where, *bndptr, *bndind; - idxtype *sxadj[2], *svwgt[2], *sadjncy[2], *sadjwgt[2], *sadjwgtsum[2], *slabel[2]; - idxtype *rename; - idxtype *auxadjncy, *auxadjwgt; - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->SplitTmr)); - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - vwgt = graph->vwgt; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - adjwgtsum = graph->adjwgtsum; - label = graph->label; - where = graph->where; - bndptr = graph->bndptr; - bndind = graph->bndind; + idx_t i, ii, j, k, l, istart, iend, mypart, nvtxs, snvtxs[3], snedges[3]; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *label, *where, *bndptr, *bndind; + idx_t *sxadj[2], *svwgt[2], *sadjncy[2], *sadjwgt[2], *slabel[2]; + idx_t *rename; + idx_t *auxadjncy; + graph_t *lgraph, *rgraph; + + WCOREPUSH; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->SplitTmr)); + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + label = graph->label; + where = graph->where; + bndptr = graph->bndptr; + bndind = graph->bndind; ASSERT(bndptr != NULL); - rename = idxwspacemalloc(ctrl, nvtxs); + rename = iwspacemalloc(ctrl, nvtxs); snvtxs[0] = snvtxs[1] = snvtxs[2] = snedges[0] = snedges[1] = snedges[2] = 0; for (i=0; ixadj; - svwgt[0] = lgraph->vwgt; - sadjwgtsum[0] = lgraph->adjwgtsum; - sadjncy[0] = lgraph->adjncy; - sadjwgt[0] = lgraph->adjwgt; - slabel[0] = lgraph->label; - - SetUpSplitGraph(graph, rgraph, snvtxs[1], snedges[1]); - sxadj[1] = rgraph->xadj; - svwgt[1] = rgraph->vwgt; - sadjwgtsum[1] = rgraph->adjwgtsum; - sadjncy[1] = rgraph->adjncy; - sadjwgt[1] = rgraph->adjwgt; - slabel[1] = rgraph->label; + lgraph = SetupSplitGraph(graph, snvtxs[0], snedges[0]); + sxadj[0] = lgraph->xadj; + svwgt[0] = lgraph->vwgt; + sadjncy[0] = lgraph->adjncy; + sadjwgt[0] = lgraph->adjwgt; + slabel[0] = lgraph->label; + + rgraph = SetupSplitGraph(graph, snvtxs[1], snedges[1]); + sxadj[1] = rgraph->xadj; + svwgt[1] = rgraph->vwgt; + sadjncy[1] = rgraph->adjncy; + sadjwgt[1] = rgraph->adjwgt; + slabel[1] = rgraph->label; /* Go and use bndptr to also mark the boundary nodes in the two partitions */ for (ii=0; iinbnd; ii++) { @@ -569,7 +481,7 @@ void SplitGraphOrder(CtrlType *ctrl, GraphType *graph, GraphType *lgraph, GraphT continue; istart = xadj[i]; - iend = xadj[i+1]; + iend = xadj[i+1]; if (bndptr[i] == -1) { /* This is an interior vertex */ auxadjncy = sadjncy[mypart] + snedges[mypart] - istart; for(j=istart; jnvtxs = snvtxs[0]; + lgraph->nvtxs = snvtxs[0]; lgraph->nedges = snedges[0]; - rgraph->nvtxs = snvtxs[1]; + rgraph->nvtxs = snvtxs[1]; rgraph->nedges = snedges[1]; - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->SplitTmr)); - - idxwspacefree(ctrl, nvtxs); + SetupGraph_tvwgt(lgraph); + SetupGraph_tvwgt(rgraph); -} - - -/************************************************************************* -* This function uses MMD to order the graph. The vertices are numbered -* from lastvtx downwards -**************************************************************************/ -void MMDOrder(CtrlType *ctrl, GraphType *graph, idxtype *order, int lastvtx) -{ - int i, j, k, nvtxs, nofsub, firstvtx; - idxtype *xadj, *adjncy, *label; - idxtype *perm, *iperm, *head, *qsize, *list, *marker; + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->SplitTmr)); - nvtxs = graph->nvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; + *r_lgraph = lgraph; + *r_rgraph = rgraph; - /* Relabel the vertices so that it starts from 1 */ - k = xadj[nvtxs]; - for (i=0; ilabel; - firstvtx = lastvtx-nvtxs; - for (i=0; idbglvl, DBG_TIME, starttimer(ctrl->SplitTmr)); - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - vwgt = graph->vwgt; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - adjwgtsum = graph->adjwgtsum; - label = graph->label; - where = graph->where; - bndptr = graph->bndptr; - bndind = graph->bndind; + idx_t i, ii, iii, j, k, l, istart, iend, mypart, nvtxs, snvtxs, snedges; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *label, *where, *bndptr, *bndind; + idx_t *sxadj, *svwgt, *sadjncy, *sadjwgt, *slabel; + idx_t *rename; + idx_t *auxadjncy; + graph_t **sgraphs; + + WCOREPUSH; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->SplitTmr)); + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + label = graph->label; + where = graph->where; + bndptr = graph->bndptr; + bndind = graph->bndind; ASSERT(bndptr != NULL); /* Go and use bndptr to also mark the boundary nodes in the two partitions */ @@ -693,11 +581,13 @@ int SplitGraphOrderCC(CtrlType *ctrl, GraphType *graph, GraphType *sgraphs, int bndptr[adjncy[j]] = 1; } - rename = idxwspacemalloc(ctrl, nvtxs); + rename = iwspacemalloc(ctrl, nvtxs); + sgraphs = (graph_t **)gk_malloc(sizeof(graph_t *)*ncmps, "SplitGraphOrderCC: sgraphs"); + /* Go and split the graph a component at a time */ for (iii=0; iiixadj; + svwgt = sgraphs[iii]->vwgt; + sadjncy = sgraphs[iii]->adjncy; + sadjwgt = sgraphs[iii]->adjwgt; + slabel = sgraphs[iii]->label; snvtxs = snedges = sxadj[0] = 0; for (ii=cptr[iii]; iinvtxs = snvtxs; + sgraphs[iii]->nedges = snedges; - if (snvtxs < MMDSWITCH) - sgraphs[iii].adjwgt = NULL; /* A marker to call MMD on the driver */ + SetupGraph_tvwgt(sgraphs[iii]); } - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->SplitTmr)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->SplitTmr)); + + WCOREPOP; + + return sgraphs; +} + + +/*************************************************************************/ +/*! This function uses MMD to order the graph. The vertices are numbered + from lastvtx downwards. */ +/*************************************************************************/ +void MMDOrder(ctrl_t *ctrl, graph_t *graph, idx_t *order, idx_t lastvtx) +{ + idx_t i, j, k, nvtxs, nofsub, firstvtx; + idx_t *xadj, *adjncy, *label; + idx_t *perm, *iperm, *head, *qsize, *list, *marker; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + /* Relabel the vertices so that it starts from 1 */ + k = xadj[nvtxs]; + for (i=0; ilabel; + firstvtx = lastvtx-nvtxs; + for (i=0; ipid = getpid(); + + switch (optype) { + case METIS_OP_PMETIS: + ctrl->objtype = GETOPTION(options, METIS_OPTION_OBJTYPE, METIS_OBJTYPE_CUT); + ctrl->rtype = METIS_RTYPE_FM; + ctrl->ncuts = GETOPTION(options, METIS_OPTION_NCUTS, 1); + ctrl->niter = GETOPTION(options, METIS_OPTION_NITER, 10); + + if (ncon == 1) { + ctrl->iptype = GETOPTION(options, METIS_OPTION_IPTYPE, METIS_IPTYPE_GROW); + ctrl->ufactor = GETOPTION(options, METIS_OPTION_UFACTOR, PMETIS_DEFAULT_UFACTOR); + ctrl->CoarsenTo = 20; + } + else { + ctrl->iptype = GETOPTION(options, METIS_OPTION_IPTYPE, METIS_IPTYPE_RANDOM); + ctrl->ufactor = GETOPTION(options, METIS_OPTION_UFACTOR, MCPMETIS_DEFAULT_UFACTOR); + ctrl->CoarsenTo = 100; + } + + break; + + + case METIS_OP_KMETIS: + ctrl->objtype = GETOPTION(options, METIS_OPTION_OBJTYPE, METIS_OBJTYPE_CUT); + ctrl->iptype = GETOPTION(options, METIS_OPTION_IPTYPE, METIS_IPTYPE_METISRB); + ctrl->rtype = METIS_RTYPE_GREEDY; + ctrl->nIparts = GETOPTION(options, METIS_OPTION_NIPARTS, -1); + ctrl->ncuts = GETOPTION(options, METIS_OPTION_NCUTS, 1); + ctrl->niter = GETOPTION(options, METIS_OPTION_NITER, 10); + ctrl->ufactor = GETOPTION(options, METIS_OPTION_UFACTOR, KMETIS_DEFAULT_UFACTOR); + ctrl->minconn = GETOPTION(options, METIS_OPTION_MINCONN, 0); + ctrl->contig = GETOPTION(options, METIS_OPTION_CONTIG, 0); + break; + + + case METIS_OP_OMETIS: + ctrl->objtype = GETOPTION(options, METIS_OPTION_OBJTYPE, METIS_OBJTYPE_NODE); + ctrl->rtype = GETOPTION(options, METIS_OPTION_RTYPE, METIS_RTYPE_SEP1SIDED); + ctrl->iptype = GETOPTION(options, METIS_OPTION_IPTYPE, METIS_IPTYPE_EDGE); + ctrl->nseps = GETOPTION(options, METIS_OPTION_NSEPS, 1); + ctrl->niter = GETOPTION(options, METIS_OPTION_NITER, 10); + ctrl->ufactor = GETOPTION(options, METIS_OPTION_UFACTOR, OMETIS_DEFAULT_UFACTOR); + ctrl->compress = GETOPTION(options, METIS_OPTION_COMPRESS, 1); + ctrl->ccorder = GETOPTION(options, METIS_OPTION_CCORDER, 0); + ctrl->pfactor = 0.1*GETOPTION(options, METIS_OPTION_PFACTOR, 0); + + ctrl->CoarsenTo = 100; + break; + + default: + gk_errexit(SIGERR, "Unknown optype of %d\n", optype); + } + + /* common options */ + ctrl->ctype = GETOPTION(options, METIS_OPTION_CTYPE, METIS_CTYPE_SHEM); + ctrl->no2hop = GETOPTION(options, METIS_OPTION_NO2HOP, 0); + ctrl->ondisk = GETOPTION(options, METIS_OPTION_ONDISK, 0); + ctrl->seed = GETOPTION(options, METIS_OPTION_SEED, -1); + ctrl->dbglvl = GETOPTION(options, METIS_OPTION_DBGLVL, 0); + ctrl->numflag = GETOPTION(options, METIS_OPTION_NUMBERING, 0); + ctrl->dropedges = GETOPTION(options, METIS_OPTION_DROPEDGES, 0); + + /* set non-option information */ + ctrl->optype = optype; + ctrl->ncon = ncon; + ctrl->nparts = nparts; + ctrl->maxvwgt = ismalloc(ncon, 0, "SetupCtrl: maxvwgt"); + + /* setup the target partition weights */ + if (ctrl->optype != METIS_OP_OMETIS) { + ctrl->tpwgts = rsmalloc(nparts*ncon, 0.0, "SetupCtrl: ctrl->tpwgts"); + if (tpwgts) { + rcopy(nparts*ncon, tpwgts, ctrl->tpwgts); + } + else { + for (i=0; itpwgts[i*ncon+j] = 1.0/nparts; + } + } + } + else { /* METIS_OP_OMETIS */ + /* this is required to allow the pijbm to be defined properly for + the edge-based refinement during initial partitioning */ + ctrl->tpwgts = rsmalloc(2, .5, "SetupCtrl: ctrl->tpwgts"); + } + + + /* setup the ubfactors */ + ctrl->ubfactors = rsmalloc(ctrl->ncon, I2RUBFACTOR(ctrl->ufactor), "SetupCtrl: ubfactors"); + if (ubvec) + rcopy(ctrl->ncon, ubvec, ctrl->ubfactors); + for (i=0; incon; i++) + ctrl->ubfactors[i] += 0.0000499; + + /* Allocate memory for balance multipliers. + Note that for PMETIS/OMETIS routines the memory allocated is more + than required as balance multipliers for 2 parts is sufficient. */ + ctrl->pijbm = rmalloc(nparts*ncon, "SetupCtrl: ctrl->pijbm"); + + InitRandom(ctrl->seed); + + IFSET(ctrl->dbglvl, METIS_DBG_INFO, PrintCtrl(ctrl)); + + if (!CheckParams(ctrl)) { + FreeCtrl(&ctrl); + return NULL; + } + else { + return ctrl; + } +} + + +/*************************************************************************/ +/*! Computes the per-partition/constraint balance multipliers */ +/*************************************************************************/ +void SetupKWayBalMultipliers(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, j; + + for (i=0; inparts; i++) { + for (j=0; jncon; j++) + ctrl->pijbm[i*graph->ncon+j] = graph->invtvwgt[j]/ctrl->tpwgts[i*graph->ncon+j]; + } +} + + +/*************************************************************************/ +/*! Computes the per-partition/constraint balance multipliers */ +/*************************************************************************/ +void Setup2WayBalMultipliers(ctrl_t *ctrl, graph_t *graph, real_t *tpwgts) +{ + idx_t i, j; + + for (i=0; i<2; i++) { + for (j=0; jncon; j++) + ctrl->pijbm[i*graph->ncon+j] = graph->invtvwgt[j]/tpwgts[i*graph->ncon+j]; + } +} + + +/*************************************************************************/ +/*! This function prints the various control fields */ +/*************************************************************************/ +void PrintCtrl(ctrl_t *ctrl) +{ + idx_t i, j, modnum; + + printf(" Runtime parameters:\n"); + + printf(" Objective type: "); + switch (ctrl->objtype) { + case METIS_OBJTYPE_CUT: + printf("METIS_OBJTYPE_CUT\n"); + break; + case METIS_OBJTYPE_VOL: + printf("METIS_OBJTYPE_VOL\n"); + break; + case METIS_OBJTYPE_NODE: + printf("METIS_OBJTYPE_NODE\n"); + break; + default: + printf("Unknown!\n"); + } + + printf(" Coarsening type: "); + switch (ctrl->ctype) { + case METIS_CTYPE_RM: + printf("METIS_CTYPE_RM\n"); + break; + case METIS_CTYPE_SHEM: + printf("METIS_CTYPE_SHEM\n"); + break; + default: + printf("Unknown!\n"); + } + + printf(" Initial partitioning type: "); + switch (ctrl->iptype) { + case METIS_IPTYPE_GROW: + printf("METIS_IPTYPE_GROW\n"); + break; + case METIS_IPTYPE_RANDOM: + printf("METIS_IPTYPE_RANDOM\n"); + break; + case METIS_IPTYPE_EDGE: + printf("METIS_IPTYPE_EDGE\n"); + break; + case METIS_IPTYPE_NODE: + printf("METIS_IPTYPE_NODE\n"); + break; + case METIS_IPTYPE_METISRB: + printf("METIS_IPTYPE_METISRB\n"); + break; + default: + printf("Unknown!\n"); + } + + printf(" Refinement type: "); + switch (ctrl->rtype) { + case METIS_RTYPE_FM: + printf("METIS_RTYPE_FM\n"); + break; + case METIS_RTYPE_GREEDY: + printf("METIS_RTYPE_GREEDY\n"); + break; + case METIS_RTYPE_SEP2SIDED: + printf("METIS_RTYPE_SEP2SIDED\n"); + break; + case METIS_RTYPE_SEP1SIDED: + printf("METIS_RTYPE_SEP1SIDED\n"); + break; + default: + printf("Unknown!\n"); + } + + printf(" Perform a 2-hop matching: %s\n", (ctrl->no2hop ? "No" : "Yes")); + + printf(" On disk storage: %s\n", (ctrl->ondisk ? "Yes" : "No")); + printf(" Drop edges: %s\n", (ctrl->dropedges ? "Yes" : "No")); + + printf(" Number of balancing constraints: %"PRIDX"\n", ctrl->ncon); + printf(" Number of refinement iterations: %"PRIDX"\n", ctrl->niter); + printf(" Number of initial partitionings: %"PRIDX"\n", ctrl->nIparts); + printf(" Random number seed: %"PRIDX"\n", ctrl->seed); + + if (ctrl->optype == METIS_OP_OMETIS) { + printf(" Number of separators: %"PRIDX"\n", ctrl->nseps); + printf(" Compress graph prior to ordering: %s\n", (ctrl->compress ? "Yes" : "No")); + printf(" Detect & order connected components separately: %s\n", (ctrl->ccorder ? "Yes" : "No")); + printf(" Prunning factor for high degree vertices: %"PRREAL"\n", ctrl->pfactor); + } + else { + printf(" Number of partitions: %"PRIDX"\n", ctrl->nparts); + printf(" Number of cuts: %"PRIDX"\n", ctrl->ncuts); + printf(" User-supplied ufactor: %"PRIDX"\n", ctrl->ufactor); + + if (ctrl->optype == METIS_OP_KMETIS) { + printf(" Minimize connectivity: %s\n", (ctrl->minconn ? "Yes" : "No")); + printf(" Create contiguous partitions: %s\n", (ctrl->contig ? "Yes" : "No")); + } + + modnum = (ctrl->ncon==1 ? 5 : (ctrl->ncon==2 ? 3 : (ctrl->ncon==3 ? 2 : 1))); + printf(" Target partition weights: "); + for (i=0; inparts; i++) { + if (i%modnum == 0) + printf("\n "); + printf("%4"PRIDX"=[", i); + for (j=0; jncon; j++) + printf("%s%.2e", (j==0 ? "" : " "), (double)ctrl->tpwgts[i*ctrl->ncon+j]); + printf("]"); + } + printf("\n"); + } + + printf(" Allowed maximum load imbalance: "); + for (i=0; incon; i++) + printf("%.3"PRREAL" ", ctrl->ubfactors[i]); + printf("\n"); + + printf("\n"); +} + + +/*************************************************************************/ +/*! This function checks the validity of user-supplied parameters */ +/*************************************************************************/ +int CheckParams(ctrl_t *ctrl) +{ + idx_t i, j; + real_t sum; + mdbglvl_et dbglvl=METIS_DBG_INFO; + + switch (ctrl->optype) { + case METIS_OP_PMETIS: + if (ctrl->objtype != METIS_OBJTYPE_CUT) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect objective type.\n")); + return 0; + } + if (ctrl->ctype != METIS_CTYPE_RM && ctrl->ctype != METIS_CTYPE_SHEM) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect coarsening scheme.\n")); + return 0; + } + if (ctrl->iptype != METIS_IPTYPE_GROW && ctrl->iptype != METIS_IPTYPE_RANDOM) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect initial partitioning scheme.\n")); + return 0; + } + if (ctrl->rtype != METIS_RTYPE_FM) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect refinement scheme.\n")); + return 0; + } + if (ctrl->ncuts <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ncuts.\n")); + return 0; + } + if (ctrl->niter <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect niter.\n")); + return 0; + } + if (ctrl->ufactor <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ufactor.\n")); + return 0; + } + if (ctrl->numflag != 0 && ctrl->numflag != 1) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect numflag.\n")); + return 0; + } + if (ctrl->nparts <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect nparts.\n")); + return 0; + } + if (ctrl->ncon <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ncon.\n")); + return 0; + } + + for (i=0; incon; i++) { + sum = rsum(ctrl->nparts, ctrl->tpwgts+i, ctrl->ncon); + if (sum < 0.99 || sum > 1.01) { + IFSET(dbglvl, METIS_DBG_INFO, + printf("Input Error: Incorrect sum of %"PRREAL" for tpwgts for constraint %"PRIDX".\n", sum, i)); + return 0; + } + } + for (i=0; incon; i++) { + for (j=0; jnparts; j++) { + if (ctrl->tpwgts[j*ctrl->ncon+i] <= 0.0) { + IFSET(dbglvl, METIS_DBG_INFO, + printf("Input Error: Incorrect tpwgts for partition %"PRIDX" and constraint %"PRIDX".\n", j, i)); + return 0; + } + } + } + + for (i=0; incon; i++) { + if (ctrl->ubfactors[i] <= 1.0) { + IFSET(dbglvl, METIS_DBG_INFO, + printf("Input Error: Incorrect ubfactor for constraint %"PRIDX".\n", i)); + return 0; + } + } + + break; + + case METIS_OP_KMETIS: + if (ctrl->objtype != METIS_OBJTYPE_CUT && ctrl->objtype != METIS_OBJTYPE_VOL) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect objective type.\n")); + return 0; + } + if (ctrl->ctype != METIS_CTYPE_RM && ctrl->ctype != METIS_CTYPE_SHEM) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect coarsening scheme.\n")); + return 0; + } + if (ctrl->iptype != METIS_IPTYPE_METISRB && ctrl->iptype != METIS_IPTYPE_GROW) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect initial partitioning scheme.\n")); + return 0; + } + if (ctrl->rtype != METIS_RTYPE_GREEDY) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect refinement scheme.\n")); + return 0; + } + if (ctrl->ncuts <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ncuts.\n")); + return 0; + } + if (ctrl->niter <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect niter.\n")); + return 0; + } + if (ctrl->ufactor <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ufactor.\n")); + return 0; + } + if (ctrl->numflag != 0 && ctrl->numflag != 1) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect numflag.\n")); + return 0; + } + if (ctrl->nparts <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect nparts.\n")); + return 0; + } + if (ctrl->ncon <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ncon.\n")); + return 0; + } + if (ctrl->contig != 0 && ctrl->contig != 1) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect contig.\n")); + return 0; + } + if (ctrl->minconn != 0 && ctrl->minconn != 1) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect minconn.\n")); + return 0; + } + + for (i=0; incon; i++) { + sum = rsum(ctrl->nparts, ctrl->tpwgts+i, ctrl->ncon); + if (sum < 0.99 || sum > 1.01) { + IFSET(dbglvl, METIS_DBG_INFO, + printf("Input Error: Incorrect sum of %"PRREAL" for tpwgts for constraint %"PRIDX".\n", sum, i)); + return 0; + } + } + for (i=0; incon; i++) { + for (j=0; jnparts; j++) { + if (ctrl->tpwgts[j*ctrl->ncon+i] <= 0.0) { + IFSET(dbglvl, METIS_DBG_INFO, + printf("Input Error: Incorrect tpwgts for partition %"PRIDX" and constraint %"PRIDX".\n", j, i)); + return 0; + } + } + } + + for (i=0; incon; i++) { + if (ctrl->ubfactors[i] <= 1.0) { + IFSET(dbglvl, METIS_DBG_INFO, + printf("Input Error: Incorrect ubfactor for constraint %"PRIDX".\n", i)); + return 0; + } + } + + break; + + + + case METIS_OP_OMETIS: + if (ctrl->objtype != METIS_OBJTYPE_NODE) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect objective type.\n")); + return 0; + } + if (ctrl->ctype != METIS_CTYPE_RM && ctrl->ctype != METIS_CTYPE_SHEM) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect coarsening scheme.\n")); + return 0; + } + if (ctrl->iptype != METIS_IPTYPE_EDGE && ctrl->iptype != METIS_IPTYPE_NODE) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect initial partitioning scheme.\n")); + return 0; + } + if (ctrl->rtype != METIS_RTYPE_SEP1SIDED && ctrl->rtype != METIS_RTYPE_SEP2SIDED) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect refinement scheme.\n")); + return 0; + } + if (ctrl->nseps <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect nseps.\n")); + return 0; + } + if (ctrl->niter <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect niter.\n")); + return 0; + } + if (ctrl->ufactor <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ufactor.\n")); + return 0; + } + if (ctrl->numflag != 0 && ctrl->numflag != 1) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect numflag.\n")); + return 0; + } + if (ctrl->nparts != 3) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect nparts.\n")); + return 0; + } + if (ctrl->ncon != 1) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ncon.\n")); + return 0; + } + if (ctrl->compress != 0 && ctrl->compress != 1) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect compress.\n")); + return 0; + } + if (ctrl->ccorder != 0 && ctrl->ccorder != 1) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ccorder.\n")); + return 0; + } + if (ctrl->pfactor < 0.0 ) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect pfactor.\n")); + return 0; + } + + for (i=0; incon; i++) { + if (ctrl->ubfactors[i] <= 1.0) { + IFSET(dbglvl, METIS_DBG_INFO, + printf("Input Error: Incorrect ubfactor for constraint %"PRIDX".\n", i)); + return 0; + } + } + + break; + + default: + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect optype\n")); + return 0; + } + + return 1; +} + + +/*************************************************************************/ +/*! This function frees the memory associated with a ctrl_t */ +/*************************************************************************/ +void FreeCtrl(ctrl_t **r_ctrl) +{ + ctrl_t *ctrl = *r_ctrl; + + FreeWorkSpace(ctrl); + + gk_free((void **)&ctrl->tpwgts, &ctrl->pijbm, + &ctrl->ubfactors, &ctrl->maxvwgt, &ctrl, LTERM); + + *r_ctrl = NULL; +} + + diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/out b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/out deleted file mode 100644 index e848d8f4..00000000 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/out +++ /dev/null @@ -1,107 +0,0 @@ -checkgraph.c: if (adjwgt != NULL && adjwgt[l] != adjwgt[j]) { -debug.c: if (graph->adjwgt == NULL) { -graph.c: graph->vwgt = NULL; -graph.c: graph->adjwgt = NULL; -kwayfm.c: if (myrinfo->edegrees == NULL) { -kwayfm.c: if (myrinfo->edegrees == NULL) { -kwayfm.c: if (myrinfo->edegrees == NULL) { -kwayrefine.c: if (graph->vwgt == NULL) { -kwayrefine.c: if (ctrl->wspace.edegrees != NULL) -kwayrefine.c: myrinfo->edegrees = NULL; -kwayrefine.c: myrinfo->edegrees = NULL; -kwayrefine.c: myrinfo->edegrees = NULL; -kwayrefine.c: graph->coarser = NULL; -kwayvolfm.c: if (myrinfo->edegrees == NULL) { -kwayvolfm.c: vsize = (graph->vsize == NULL ? graph->vwgt : graph->vsize); -kwayvolrefine.c: MALLOC_CHECK(NULL); -kwayvolrefine.c: myrinfo->edegrees = NULL; -kwayvolrefine.c: myrinfo->edegrees = NULL; -kwayvolrefine.c: myrinfo->edegrees = NULL; -kwayvolrefine.c: graph->coarser = NULL; -memory.c: ctrl->wspace.pmat = NULL; -memory.c: ctrl->wspace.vedegrees = NULL; -memory.c: ctrl->wspace.edegrees = NULL; -memory.c: ctrl->wspace.vedegrees = NULL; -memory.c: graph->gdata = graph->rdata = NULL; -memory.c: graph->xadj = graph->vwgt = graph->adjncy = graph->adjwgt = NULL; -memory.c: graph->adjwgtsum = NULL; -memory.c: graph->label = NULL; -memory.c: graph->cmap = NULL; -memory.c: graph->where = graph->pwgts = NULL; -memory.c: graph->id = graph->ed = NULL; -memory.c: graph->bndptr = graph->bndind = NULL; -memory.c: graph->rinfo = NULL; -memory.c: graph->vrinfo = NULL; -memory.c: graph->nrinfo = NULL; -memory.c: graph->nvwgt = NULL; -memory.c: graph->npwgts = NULL; -memory.c: graph->vsize = NULL; -memory.c: graph->coarser = graph->finer = NULL; -meshpart.c: METIS_PartGraphKway(nn, xadj, adjncy, NULL, NULL, &wgtflag, &pnumflag, nparts, options, edgecut, npart); -meshpart.c: METIS_PartGraphKway(ne, xadj, adjncy, NULL, NULL, &wgtflag, &pnumflag, nparts, options, edgecut, epart); -mkwayfmh.c: if (myrinfo->edegrees == NULL) { -mkwayfmh.c: if (myrinfo->edegrees == NULL) { -mkwayrefine.c: myrinfo->edegrees = NULL; -mkwayrefine.c: myrinfo->edegrees = NULL; -mkwayrefine.c: myrinfo->edegrees = NULL; -mkwayrefine.c: graph->coarser = NULL; -mpmetis.c: lubvec = rubvec = NULL; -mrefine.c: graph->coarser = NULL; -ometis.c: SetUpGraph(&graph, OP_OEMETIS, *nvtxs, 1, xadj, adjncy, NULL, NULL, 0); -ometis.c: SetUpGraph(&graph, OP_ONMETIS, *nvtxs, 1, xadj, adjncy, NULL, NULL, 0); -ometis.c: SetUpGraph(&graph, OP_ONMETIS, *nvtxs, 1, xadj, adjncy, vwgt, NULL, 2); -ometis.c: if (sgraphs[i].adjwgt == NULL) { -ometis.c: ASSERT(bndptr != NULL); -ometis.c: ASSERT(bndptr != NULL); -ometis.c: sgraphs[iii].adjwgt = NULL; /* A marker to call MMD on the driver */ -parmetis.c: SetUpGraph(&graph, OP_ONMETIS, nvtxs, 1, xadj, adjncy, NULL, NULL, 0); -parmetis.c: graph.npwgts = NULL; -pmetis.c: ASSERT(bndptr != NULL); -pqueue.c: queue->buckets = NULL; -pqueue.c: queue->nodes = NULL; -pqueue.c: queue->heap = NULL; -pqueue.c: queue->locator = NULL; -pqueue.c: queue->buckets[i] = NULL; -pqueue.c: queue->buckets[i] = NULL; -pqueue.c: newnode->prev = NULL; -pqueue.c: if (newnode->next != NULL) -pqueue.c: if (newnode->prev != NULL) -pqueue.c: if (newnode->next != NULL) -pqueue.c: if (buckets[gain] == NULL && gain == queue->maxgain) { -pqueue.c: for (; buckets[queue->maxgain]==NULL; queue->maxgain--); -pqueue.c: if (newnode->prev != NULL) -pqueue.c: if (newnode->next != NULL) -pqueue.c: newnode->prev = NULL; -pqueue.c: if (newnode->next != NULL) -pqueue.c: if (tptr->next != NULL) { -pqueue.c: tptr->next->prev = NULL; -pqueue.c: for (; queue->buckets[queue->maxgain]==NULL; queue->maxgain--); -refine.c: graph->coarser = NULL; -srefine.c: graph->coarser = NULL; -stat.c: if (vwgt == NULL) { -stat.c: if (adjwgt == NULL) { -stat.c: IsConnectedSubdomain(NULL, graph, i, 1); -stat.c: graph->vwgt = NULL; -stat.c: graph->adjwgt = NULL; -stat.c: if (vwgt == NULL) { -stat.c: if (adjwgt == NULL) { -stat.c: graph->vwgt = NULL; -stat.c: graph->adjwgt = NULL; -stat.c: if (vwgt == NULL && ncon == 1) { -subdomains.c: if (myrinfo->edegrees == NULL) { -subdomains.c: if (myrinfo->edegrees == NULL) { -subdomains.c: if (myrinfo->edegrees == NULL) { -subdomains.c: if (myrinfo->edegrees == NULL) { -subdomains.c: if (myrinfo->edegrees == NULL) { -subdomains.c: if (myrinfo->edegrees == NULL) { -util.c: return NULL; -util.c: return NULL; -util.c: return NULL; -util.c: return NULL; -util.c: return NULL; -util.c: return NULL; -util.c: if (ptr == NULL) -util.c: if (*ptr1 != NULL) -util.c: *ptr1 = NULL; -util.c: if (*ptr != NULL) -util.c: *ptr = NULL; diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/parmetis.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/parmetis.c index 2951c167..95ad901f 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/parmetis.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/parmetis.c @@ -8,534 +8,226 @@ * Started 10/14/97 * George * - * $Id: parmetis.c,v 1.2 2003/07/24 18:39:11 karypis Exp $ + * $Id: parmetis.c 10481 2011-07-05 18:01:23Z karypis $ * */ -#include +#include "metislib.h" -/************************************************************************* -* This function is the entry point for KMETIS with seed specification -* in options[7] -**************************************************************************/ -void METIS_PartGraphKway2(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, - int *options, int *edgecut, idxtype *part) -{ - int i; - float *tpwgts; - - tpwgts = fmalloc(*nparts, "KMETIS: tpwgts"); - for (i=0; i<*nparts; i++) - tpwgts[i] = 1.0/(1.0*(*nparts)); - - METIS_WPartGraphKway2(nvtxs, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, - tpwgts, options, edgecut, part); - GKfree((void **)&tpwgts, LTERM); -} - - -/************************************************************************* -* This function is the entry point for KWMETIS with seed specification -* in options[7] -**************************************************************************/ -void METIS_WPartGraphKway2(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, - float *tpwgts, int *options, int *edgecut, idxtype *part) +/*************************************************************************/ +/*! This function is the entry point for the node ND code for ParMETIS. + The difference between this routine and the standard METIS_NodeND are + the following + + - It performs at least log2(npes) levels of nested dissection. + - It stores the size of the log2(npes) top-level separators in the + sizes array. +*/ +/*************************************************************************/ +int METIS_NodeNDP(idx_t nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t npes, idx_t *options, idx_t *perm, idx_t *iperm, idx_t *sizes) { - int i, j; - GraphType graph; - CtrlType ctrl; - - if (*numflag == 1) - Change2CNumbering(*nvtxs, xadj, adjncy); - - SetUpGraph(&graph, OP_KMETIS, *nvtxs, 1, xadj, adjncy, vwgt, adjwgt, *wgtflag); - - if (options[0] == 0) { /* Use the default parameters */ - ctrl.CType = KMETIS_CTYPE; - ctrl.IType = KMETIS_ITYPE; - ctrl.RType = KMETIS_RTYPE; - ctrl.dbglvl = KMETIS_DBGLVL; - } - else { - ctrl.CType = options[OPTION_CTYPE]; - ctrl.IType = options[OPTION_ITYPE]; - ctrl.RType = options[OPTION_RTYPE]; - ctrl.dbglvl = options[OPTION_DBGLVL]; + idx_t i, ii, j, l, nnvtxs=0; + graph_t *graph; + ctrl_t *ctrl; + idx_t *cptr, *cind; + + ctrl = SetupCtrl(METIS_OP_OMETIS, options, 1, 3, NULL, NULL); + if (!ctrl) return METIS_ERROR_INPUT; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, InitTimers(ctrl)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->TotalTmr)); + + /* compress the graph; not that compression only happens if not prunning + has taken place. */ + if (ctrl->compress) { + cptr = imalloc(nvtxs+1, "OMETIS: cptr"); + cind = imalloc(nvtxs, "OMETIS: cind"); + + graph = CompressGraph(ctrl, nvtxs, xadj, adjncy, vwgt, cptr, cind); + if (graph == NULL) { + /* if there was no compression, cleanup the compress flag */ + gk_free((void **)&cptr, &cind, LTERM); + ctrl->compress = 0; + } + else { + nnvtxs = graph->nvtxs; + } } - ctrl.optype = OP_KMETIS; - ctrl.CoarsenTo = 20*(*nparts); - ctrl.maxvwgt = 1.5*((graph.vwgt ? idxsum(*nvtxs, graph.vwgt) : (*nvtxs))/ctrl.CoarsenTo); - - InitRandom(options[7]); - - AllocateWorkSpace(&ctrl, &graph, *nparts); - IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); + /* if no compression, setup the graph in the normal way. */ + if (ctrl->compress == 0) + graph = SetupGraph(ctrl, nvtxs, 1, xadj, adjncy, vwgt, NULL, NULL); - *edgecut = MlevelKWayPartitioning(&ctrl, &graph, *nparts, part, tpwgts, 1.000); - IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); - IFSET(ctrl.dbglvl, DBG_TIME, PrintTimers(&ctrl)); + /* allocate workspace memory */ + AllocateWorkSpace(ctrl, graph); - FreeWorkSpace(&ctrl, &graph); - if (*numflag == 1) - Change2FNumbering(*nvtxs, xadj, adjncy, part); -} - - -/************************************************************************* -* This function is the entry point for the node ND code for ParMETIS -**************************************************************************/ -void METIS_NodeNDP(int nvtxs, idxtype *xadj, idxtype *adjncy, int npes, - int *options, idxtype *perm, idxtype *iperm, idxtype *sizes) -{ - int i, ii, j, l, wflag, nflag; - GraphType graph; - CtrlType ctrl; - idxtype *cptr, *cind; - - if (options[0] == 0) { /* Use the default parameters */ - ctrl.CType = ONMETIS_CTYPE; - ctrl.IType = ONMETIS_ITYPE; - ctrl.RType = ONMETIS_RTYPE; - ctrl.dbglvl = ONMETIS_DBGLVL; - ctrl.oflags = ONMETIS_OFLAGS; - ctrl.pfactor = ONMETIS_PFACTOR; - ctrl.nseps = ONMETIS_NSEPS; - } - else { - ctrl.CType = options[OPTION_CTYPE]; - ctrl.IType = options[OPTION_ITYPE]; - ctrl.RType = options[OPTION_RTYPE]; - ctrl.dbglvl = options[OPTION_DBGLVL]; - ctrl.oflags = options[OPTION_OFLAGS]; - ctrl.pfactor = options[OPTION_PFACTOR]; - ctrl.nseps = options[OPTION_NSEPS]; - } - if (ctrl.nseps < 1) - ctrl.nseps = 1; - - ctrl.optype = OP_ONMETIS; - ctrl.CoarsenTo = 100; - - IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); + /* do the nested dissection ordering */ + iset(2*npes-1, 0, sizes); + MlevelNestedDissectionP(ctrl, graph, iperm, graph->nvtxs, npes, 0, sizes); - InitRandom(-1); - if (ctrl.oflags&OFLAG_COMPRESS) { - /*============================================================ - * Compress the graph - ==============================================================*/ - cptr = idxmalloc(nvtxs+1, "ONMETIS: cptr"); - cind = idxmalloc(nvtxs, "ONMETIS: cind"); - - CompressGraph(&ctrl, &graph, nvtxs, xadj, adjncy, cptr, cind); - - if (graph.nvtxs >= COMPRESSION_FRACTION*(nvtxs)) { - ctrl.oflags--; /* We actually performed no compression */ - GKfree((void **)&cptr, &cind, LTERM); + /* Uncompress the ordering */ + if (ctrl->compress) { + /* construct perm from iperm */ + for (i=0; idbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->TotalTmr)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, PrintTimers(ctrl)); + /* clean up */ + FreeCtrl(&ctrl); + + return METIS_OK; } -/************************************************************************* -* This function takes a graph and produces a bisection of it -**************************************************************************/ -void MlevelNestedDissectionP(CtrlType *ctrl, GraphType *graph, idxtype *order, int lastvtx, - int npes, int cpos, idxtype *sizes) +/*************************************************************************/ +/*! This function is similar to MlevelNestedDissection with the difference + that it also records separator sizes for the top log2(npes) levels */ +/**************************************************************************/ +void MlevelNestedDissectionP(ctrl_t *ctrl, graph_t *graph, idx_t *order, + idx_t lastvtx, idx_t npes, idx_t cpos, idx_t *sizes) { - int i, j, nvtxs, nbnd, tvwgt, tpwgts2[2]; - idxtype *label, *bndind; - GraphType lgraph, rgraph; - float ubfactor; + idx_t i, j, nvtxs, nbnd; + idx_t *label, *bndind; + graph_t *lgraph, *rgraph; nvtxs = graph->nvtxs; if (nvtxs == 0) { - GKfree((void **)&graph->gdata, &graph->rdata, &graph->label, LTERM); + FreeGraph(&graph); return; } - /* Determine the weights of the partitions */ - tvwgt = idxsum(nvtxs, graph->vwgt); - tpwgts2[0] = tvwgt/2; - tpwgts2[1] = tvwgt-tpwgts2[0]; + MlevelNodeBisectionMultiple(ctrl, graph); - if (cpos >= npes-1) - ubfactor = ORDER_UNBALANCE_FRACTION; - else - ubfactor = 1.05; - - - MlevelNodeBisectionMultiple(ctrl, graph, tpwgts2, ubfactor); - - IFSET(ctrl->dbglvl, DBG_SEPINFO, printf("Nvtxs: %6d, [%6d %6d %6d]\n", graph->nvtxs, graph->pwgts[0], graph->pwgts[1], graph->pwgts[2])); + IFSET(ctrl->dbglvl, METIS_DBG_SEPINFO, + printf("Nvtxs: %6"PRIDX", [%6"PRIDX" %6"PRIDX" %6"PRIDX"]\n", + graph->nvtxs, graph->pwgts[0], graph->pwgts[1], graph->pwgts[2])); if (cpos < npes-1) { - sizes[2*npes-2-cpos] = graph->pwgts[2]; + sizes[2*npes-2-cpos] = graph->pwgts[2]; sizes[2*npes-2-(2*cpos+1)] = graph->pwgts[1]; sizes[2*npes-2-(2*cpos+2)] = graph->pwgts[0]; } /* Order the nodes in the separator */ - nbnd = graph->nbnd; + nbnd = graph->nbnd; bndind = graph->bndind; - label = graph->label; + label = graph->label; for (i=0; igdata, &graph->rdata, &graph->label, LTERM); + FreeGraph(&graph); - if (rgraph.nvtxs > MMDSWITCH || 2*cpos+1 < npes-1) - MlevelNestedDissectionP(ctrl, &rgraph, order, lastvtx, npes, 2*cpos+1, sizes); + if ((lgraph->nvtxs > MMDSWITCH || 2*cpos+2 < npes-1) && lgraph->nedges > 0) + MlevelNestedDissectionP(ctrl, lgraph, order, lastvtx-rgraph->nvtxs, npes, 2*cpos+2, sizes); else { - MMDOrder(ctrl, &rgraph, order, lastvtx); - GKfree((void **)&rgraph.gdata, &rgraph.rdata, &rgraph.label, LTERM); + MMDOrder(ctrl, lgraph, order, lastvtx-rgraph->nvtxs); + FreeGraph(&lgraph); } - if (lgraph.nvtxs > MMDSWITCH || 2*cpos+2 < npes-1) - MlevelNestedDissectionP(ctrl, &lgraph, order, lastvtx-rgraph.nvtxs, npes, 2*cpos+2, sizes); + if ((rgraph->nvtxs > MMDSWITCH || 2*cpos+1 < npes-1) && rgraph->nedges > 0) + MlevelNestedDissectionP(ctrl, rgraph, order, lastvtx, npes, 2*cpos+1, sizes); else { - MMDOrder(ctrl, &lgraph, order, lastvtx-rgraph.nvtxs); - GKfree((void **)&lgraph.gdata, &lgraph.rdata, &lgraph.label, LTERM); + MMDOrder(ctrl, rgraph, order, lastvtx); + FreeGraph(&rgraph); } } -/************************************************************************* -* This function is the entry point for ONWMETIS. It requires weights on the -* vertices. It is for the case that the matrix has been pre-compressed. -**************************************************************************/ -void METIS_NodeComputeSeparator(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, float *ubfactor, int *options, int *sepsize, idxtype *part) +/*************************************************************************/ +/*! This function bisects a graph by computing a vertex separator +*/ +/**************************************************************************/ +int METIS_ComputeVertexSeparator(idx_t *nvtxs, idx_t *xadj, idx_t *adjncy, + idx_t *vwgt, idx_t *options, idx_t *r_sepsize, idx_t *part) { - int i, j, tvwgt, tpwgts[2]; - GraphType graph; - CtrlType ctrl; - - SetUpGraph(&graph, OP_ONMETIS, *nvtxs, 1, xadj, adjncy, vwgt, adjwgt, 3); - tvwgt = idxsum(*nvtxs, graph.vwgt); - - if (options[0] == 0) { /* Use the default parameters */ - ctrl.CType = ONMETIS_CTYPE; - ctrl.IType = ONMETIS_ITYPE; - ctrl.RType = ONMETIS_RTYPE; - ctrl.dbglvl = ONMETIS_DBGLVL; - } - else { - ctrl.CType = options[OPTION_CTYPE]; - ctrl.IType = options[OPTION_ITYPE]; - ctrl.RType = options[OPTION_RTYPE]; - ctrl.dbglvl = options[OPTION_DBGLVL]; - } - - ctrl.oflags = OFLAG_COMPRESS; /* For by-passing the pre-coarsening for multiple runs */ - ctrl.RType = 2; /* Standard 1-sided node refinement code */ - ctrl.pfactor = 0; - ctrl.nseps = 5; /* This should match NUM_INIT_MSECTIONS in ParMETISLib/defs.h */ - ctrl.optype = OP_ONMETIS; + idx_t i, j; + graph_t *graph; + ctrl_t *ctrl; - InitRandom(options[7]); + if ((ctrl = SetupCtrl(METIS_OP_OMETIS, options, 1, 3, NULL, NULL)) == NULL) + return METIS_ERROR_INPUT; - AllocateWorkSpace(&ctrl, &graph, 2); - - /*============================================================ - * Perform the bisection - *============================================================*/ - tpwgts[0] = tvwgt/2; - tpwgts[1] = tvwgt-tpwgts[0]; + InitRandom(ctrl->seed); - MlevelNodeBisectionMultiple(&ctrl, &graph, tpwgts, *ubfactor*.95); + graph = SetupGraph(ctrl, *nvtxs, 1, xadj, adjncy, vwgt, NULL, NULL); - *sepsize = graph.pwgts[2]; - idxcopy(*nvtxs, graph.where, part); - - GKfree((void **)&graph.gdata, &graph.rdata, &graph.label, LTERM); - - - FreeWorkSpace(&ctrl, &graph); - -} - - -/************************************************************************* -* This function is the entry point for ONWMETIS. It requires weights on the -* vertices. It is for the case that the matrix has been pre-compressed. -**************************************************************************/ -void METIS_EdgeComputeSeparator(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *options, int *sepsize, idxtype *part) -{ - int i, j, tvwgt, tpwgts[2]; - GraphType graph; - CtrlType ctrl; - - SetUpGraph(&graph, OP_ONMETIS, *nvtxs, 1, xadj, adjncy, vwgt, adjwgt, 3); - tvwgt = idxsum(*nvtxs, graph.vwgt); - - if (options[0] == 0) { /* Use the default parameters */ - ctrl.CType = ONMETIS_CTYPE; - ctrl.IType = ONMETIS_ITYPE; - ctrl.RType = ONMETIS_RTYPE; - ctrl.dbglvl = ONMETIS_DBGLVL; - } - else { - ctrl.CType = options[OPTION_CTYPE]; - ctrl.IType = options[OPTION_ITYPE]; - ctrl.RType = options[OPTION_RTYPE]; - ctrl.dbglvl = options[OPTION_DBGLVL]; - } - - ctrl.oflags = 0; - ctrl.pfactor = 0; - ctrl.nseps = 5; - ctrl.optype = OP_OEMETIS; - ctrl.CoarsenTo = amin(100, *nvtxs-1); - ctrl.maxvwgt = 1.5*tvwgt/ctrl.CoarsenTo; - - InitRandom(options[7]); - - AllocateWorkSpace(&ctrl, &graph, 2); + AllocateWorkSpace(ctrl, graph); /*============================================================ * Perform the bisection *============================================================*/ - tpwgts[0] = tvwgt/2; - tpwgts[1] = tvwgt-tpwgts[0]; - - MlevelEdgeBisection(&ctrl, &graph, tpwgts, 1.05); - ConstructMinCoverSeparator(&ctrl, &graph, 1.05); + ctrl->CoarsenTo = 100; - *sepsize = graph.pwgts[2]; - idxcopy(*nvtxs, graph.where, part); + MlevelNodeBisectionMultiple(ctrl, graph); - GKfree((void **)&graph.gdata, &graph.rdata, &graph.label, LTERM); + *r_sepsize = graph->pwgts[2]; + icopy(*nvtxs, graph->where, part); + FreeGraph(&graph); - FreeWorkSpace(&ctrl, &graph); + FreeCtrl(&ctrl); + return METIS_OK; } -/************************************************************************* -* This function is the entry point for PWMETIS that accepts exact weights -* for the target partitions -**************************************************************************/ -void METIS_mCPartGraphRecursive2(int *nvtxs, int *ncon, idxtype *xadj, idxtype *adjncy, - idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, - float *tpwgts, int *options, int *edgecut, idxtype *part) -{ - int i, j; - GraphType graph; - CtrlType ctrl; - float *mytpwgts; - float avgwgt; - - if (*numflag == 1) - Change2CNumbering(*nvtxs, xadj, adjncy); - - SetUpGraph(&graph, OP_PMETIS, *nvtxs, *ncon, xadj, adjncy, vwgt, adjwgt, *wgtflag); - graph.npwgts = NULL; - mytpwgts = fmalloc(*nparts, "mytpwgts"); - scopy(*nparts, tpwgts, mytpwgts); - - if (options[0] == 0) { /* Use the default parameters */ - ctrl.CType = McPMETIS_CTYPE; - ctrl.IType = McPMETIS_ITYPE; - ctrl.RType = McPMETIS_RTYPE; - ctrl.dbglvl = McPMETIS_DBGLVL; - } - else { - ctrl.CType = options[OPTION_CTYPE]; - ctrl.IType = options[OPTION_ITYPE]; - ctrl.RType = options[OPTION_RTYPE]; - ctrl.dbglvl = options[OPTION_DBGLVL]; - } - ctrl.optype = OP_PMETIS; - ctrl.CoarsenTo = 100; - - ctrl.nmaxvwgt = 1.5/(1.0*ctrl.CoarsenTo); - - InitRandom(options[7]); - - AllocateWorkSpace(&ctrl, &graph, *nparts); - - IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); - - ASSERT(CheckGraph(&graph)); - *edgecut = MCMlevelRecursiveBisection2(&ctrl, &graph, *nparts, mytpwgts, part, 1.000, 0); - -/* -{ -idxtype wgt[2048], minwgt, maxwgt, sumwgt; - -printf("nvtxs: %d, nparts: %d, ncon: %d\n", graph.nvtxs, *nparts, *ncon); -for (i=0; i<(*nparts)*(*ncon); i++) - wgt[i] = 0; -for (i=0; i wgt[maxwgt*(*ncon)+j]) ? i : maxwgt; - sumwgt += wgt[i*(*ncon)+j]; - } - avgwgt = (float)sumwgt / (float)*nparts; - printf("min: %5d, max: %5d, avg: %5.2f, balance: %6.3f\n", wgt[minwgt*(*ncon)+j], wgt[maxwgt*(*ncon)+j], avgwgt, (float)wgt[maxwgt*(*ncon)+j] / avgwgt); -} -printf("\n"); -} -*/ - - IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); - IFSET(ctrl.dbglvl, DBG_TIME, PrintTimers(&ctrl)); - - FreeWorkSpace(&ctrl, &graph); - GKfree((void **)&mytpwgts, LTERM); - - if (*numflag == 1) - Change2FNumbering(*nvtxs, xadj, adjncy, part); -} - - -/************************************************************************* -* This function takes a graph and produces a bisection of it -**************************************************************************/ -int MCMlevelRecursiveBisection2(CtrlType *ctrl, GraphType *graph, int nparts, - float *tpwgts, idxtype *part, float ubfactor, int fpart) -{ - int i, nvtxs, cut; - float wsum, tpwgts2[2]; - idxtype *label, *where; - GraphType lgraph, rgraph; - - nvtxs = graph->nvtxs; - if (nvtxs == 0) - return 0; - - /* Determine the weights of the partitions */ - tpwgts2[0] = ssum(nparts/2, tpwgts); - tpwgts2[1] = 1.0-tpwgts2[0]; - - MCMlevelEdgeBisection(ctrl, graph, tpwgts2, ubfactor); - cut = graph->mincut; - - label = graph->label; - where = graph->where; - for (i=0; i 2) - SplitGraphPart(ctrl, graph, &lgraph, &rgraph); - - /* Free the memory of the top level graph */ - GKfree((void **)&graph->gdata, &graph->nvwgt, &graph->rdata, &graph->label, &graph->npwgts, LTERM); - - /* Scale the fractions in the tpwgts according to the true weight */ - wsum = ssum(nparts/2, tpwgts); - sscale(nparts/2, 1.0/wsum, tpwgts); - sscale(nparts-nparts/2, 1.0/(1.0-wsum), tpwgts+nparts/2); - - /* Do the recursive call */ - if (nparts > 3) { - cut += MCMlevelRecursiveBisection2(ctrl, &lgraph, nparts/2, tpwgts, part, ubfactor, fpart); - cut += MCMlevelRecursiveBisection2(ctrl, &rgraph, nparts-nparts/2, tpwgts+nparts/2, part, ubfactor, fpart+nparts/2); - } - else if (nparts == 3) { - cut += MCMlevelRecursiveBisection2(ctrl, &rgraph, nparts-nparts/2, tpwgts+nparts/2, part, ubfactor, fpart+nparts/2); - GKfree((void **)&lgraph.gdata, &lgraph.nvwgt, &lgraph.label, LTERM); - } - - return cut; - -} - - - /*************************************************************************/ /*! This function is the entry point of a node-based separator refinement of the nodes with an hmarker[] of 0. */ /*************************************************************************/ -void METIS_NodeRefine(int nvtxs, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, - idxtype *adjwgt, idxtype *where, idxtype *hmarker, float ubfactor) +int METIS_NodeRefine(idx_t nvtxs, idx_t *xadj, idx_t *vwgt, idx_t *adjncy, + idx_t *where, idx_t *hmarker, real_t ubfactor) { - GraphType *graph; - CtrlType ctrl; + graph_t *graph; + ctrl_t *ctrl; - ctrl.dbglvl = ONMETIS_DBGLVL; - ctrl.optype = OP_ONMETIS; + /* set up the run time parameters */ + ctrl = SetupCtrl(METIS_OP_OMETIS, NULL, 1, 3, NULL, NULL); + if (!ctrl) return METIS_ERROR_INPUT; - graph = CreateGraph(); - SetUpGraph(graph, OP_ONMETIS, nvtxs, 1, xadj, adjncy, vwgt, adjwgt, 3); + /* set up the graph */ + graph = SetupGraph(ctrl, nvtxs, 1, xadj, adjncy, vwgt, NULL, NULL); - AllocateWorkSpace(&ctrl, graph, 2); + /* allocate workspace memory */ + AllocateWorkSpace(ctrl, graph); - Allocate2WayNodePartitionMemory(&ctrl, graph); - idxcopy(nvtxs, where, graph->where); + /* set up the memory and the input partition */ + Allocate2WayNodePartitionMemory(ctrl, graph); + icopy(nvtxs, where, graph->where); - Compute2WayNodePartitionParams(&ctrl, graph); + Compute2WayNodePartitionParams(ctrl, graph); - FM_2WayNodeRefine_OneSidedP(&ctrl, graph, hmarker, ubfactor, 10); - /* FM_2WayNodeRefine_TwoSidedP(&ctrl, graph, hmarker, ubfactor, 10); */ + FM_2WayNodeRefine1SidedP(ctrl, graph, hmarker, ubfactor, 10); + /* FM_2WayNodeRefine2SidedP(ctrl, graph, hmarker, ubfactor, 10); */ - FreeWorkSpace(&ctrl, graph); + icopy(nvtxs, graph->where, where); - idxcopy(nvtxs, graph->where, where); - - FreeGraph(graph); + FreeGraph(&graph); + FreeCtrl(&ctrl); + return METIS_OK; } @@ -543,17 +235,19 @@ void METIS_NodeRefine(int nvtxs, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, /*! This function performs a node-based 1-sided FM refinement that moves only nodes whose hmarker[] == -1. It is used by Parmetis. */ /*************************************************************************/ -void FM_2WayNodeRefine_OneSidedP(CtrlType *ctrl, GraphType *graph, - idxtype *hmarker, float ubfactor, int npasses) +void FM_2WayNodeRefine1SidedP(ctrl_t *ctrl, graph_t *graph, + idx_t *hmarker, real_t ubfactor, idx_t npasses) { - int i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind, nbad, qsize; - idxtype *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr; - idxtype *mptr, *mind, *swaps, *perm, *inqueue; - PQueueType parts; - NRInfoType *rinfo; - int higain, oldgain, mincut, initcut, mincutorder; - int pass, from, to, limit; - int badmaxpwgt, mindiff, newdiff; + idx_t i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind, nbad, qsize; + idx_t *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr; + idx_t *mptr, *mind, *swaps, *inqueue; + rpq_t *queue; + nrinfo_t *rinfo; + idx_t higain, oldgain, mincut, initcut, mincutorder; + idx_t pass, from, to, limit; + idx_t badmaxpwgt, mindiff, newdiff; + + WCOREPUSH; ASSERT(graph->mincut == graph->pwgts[2]); @@ -568,43 +262,43 @@ void FM_2WayNodeRefine_OneSidedP(CtrlType *ctrl, GraphType *graph, pwgts = graph->pwgts; rinfo = graph->nrinfo; - PQueueInit(ctrl, &parts, nvtxs, ComputeMaxNodeGain(nvtxs, xadj, adjncy, vwgt)); + queue = rpqCreate(nvtxs); - perm = idxwspacemalloc(ctrl, nvtxs); - swaps = idxwspacemalloc(ctrl, nvtxs); - mptr = idxwspacemalloc(ctrl, nvtxs+1); - mind = idxwspacemalloc(ctrl, nvtxs); - inqueue = idxwspacemalloc(ctrl, nvtxs); + inqueue = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs)); + swaps = iwspacemalloc(ctrl, nvtxs); + mptr = iwspacemalloc(ctrl, nvtxs+1); + mind = iwspacemalloc(ctrl, 2*nvtxs); - idxset(nvtxs, -1, inqueue); + badmaxpwgt = (idx_t)(ubfactor*gk_max(pwgts[0], pwgts[1])); - badmaxpwgt = (int)(ubfactor*amax(pwgts[0], pwgts[1])); - - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("Partitions-N1: [%6d %6d] Nv-Nb[%6d %6d] MaxPwgt[%6d]. ISep: %6d\n", - pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, badmaxpwgt, graph->mincut)); + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("Partitions-N1: [%6"PRIDX" %6"PRIDX"] Nv-Nb[%6"PRIDX" %6"PRIDX"] " + "MaxPwgt[%6"PRIDX"]. ISep: %6"PRIDX"\n", + pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, badmaxpwgt, + graph->mincut)); to = (pwgts[0] < pwgts[1] ? 1 : 0); for (pass=0; passmincut; nbnd = graph->nbnd; - RandomPermute(nbnd, perm, 1); + /* use the swaps array in place of the traditional perm array to save memory */ + irandArrayPermute(nbnd, swaps, nbnd, 1); for (ii=0; ii= 2*nvtxs-1) + break; + + inqueue[higain] = -1; + if (pwgts[to]+vwgt[higain] > badmaxpwgt) { /* Skip this vertex */ if (nbad++ > limit) break; @@ -664,7 +363,7 @@ void FM_2WayNodeRefine_OneSidedP(CtrlType *ctrl, GraphType *graph, rinfo[k].edegrees[to] += vwgt[higain]; } else if (where[k] == from) { /* This vertex is pulled into the separator */ - ASSERTP(bndptr[k] == -1, ("%d %d %d\n", k, bndptr[k], where[k])); + ASSERTP(bndptr[k] == -1, ("%"PRIDX" %"PRIDX" %"PRIDX"\n", k, bndptr[k], where[k])); BNDInsert(nbnd, bndind, bndptr, k); mind[nmind++] = k; /* Keep track for rollback */ @@ -681,15 +380,15 @@ void FM_2WayNodeRefine_OneSidedP(CtrlType *ctrl, GraphType *graph, oldgain = vwgt[kk]-rinfo[kk].edegrees[from]; rinfo[kk].edegrees[from] -= vwgt[k]; - /* Update the gain of this node if it was skipped */ + /* Update the gain of this node if it was not skipped */ if (inqueue[kk] == pass) - PQueueUpdateUp(&parts, kk, oldgain, oldgain+vwgt[k]); + rpqUpdate(queue, kk, oldgain+vwgt[k]); } } /* Insert the new vertex into the priority queue. Safe due to one-sided moves */ if (hmarker[k] == -1 || hmarker[k] == to) { - PQueueInsert(&parts, k, vwgt[k]-edegrees[from]); + rpqInsert(queue, k, vwgt[k]-edegrees[from]); inqueue[k] = pass; } } @@ -697,9 +396,10 @@ void FM_2WayNodeRefine_OneSidedP(CtrlType *ctrl, GraphType *graph, mptr[nswaps+1] = nmind; - IFSET(ctrl->dbglvl, DBG_MOVEINFO, - printf("Moved %6d to %3d, Gain: %5d [%5d] \t[%5d %5d %5d] [%3d %2d]\n", - higain, to, (vwgt[higain]-rinfo[higain].edegrees[from]), vwgt[higain], pwgts[0], pwgts[1], pwgts[2], nswaps, limit)); + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("Moved %6"PRIDX" to %3"PRIDX", Gain: %5"PRIDX" [%5"PRIDX"] \t[%5"PRIDX" %5"PRIDX" %5"PRIDX"] [%3"PRIDX" %2"PRIDX"]\n", + higain, to, (vwgt[higain]-rinfo[higain].edegrees[from]), + vwgt[higain], pwgts[0], pwgts[1], pwgts[2], nswaps, limit)); } @@ -744,8 +444,8 @@ void FM_2WayNodeRefine_OneSidedP(CtrlType *ctrl, GraphType *graph, ASSERT(mincut == pwgts[2]); - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("\tMinimum sep: %6d at %5d, PWGTS: [%6d %6d], NBND: %6d, QSIZE: %6d\n", + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("\tMinimum sep: %6"PRIDX" at %5"PRIDX", PWGTS: [%6"PRIDX" %6"PRIDX"], NBND: %6"PRIDX", QSIZE: %6"PRIDX"\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd, qsize)); graph->mincut = mincut; @@ -755,13 +455,9 @@ void FM_2WayNodeRefine_OneSidedP(CtrlType *ctrl, GraphType *graph, break; } - PQueueFree(ctrl, &parts); + rpqDestroy(queue); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs+1); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); + WCOREPOP; } @@ -769,18 +465,20 @@ void FM_2WayNodeRefine_OneSidedP(CtrlType *ctrl, GraphType *graph, /*! This function performs a node-based (two-sided) FM refinement that moves only nodes whose hmarker[] == -1. It is used by Parmetis. */ /*************************************************************************/ -void FM_2WayNodeRefine_TwoSidedP(CtrlType *ctrl, GraphType *graph, - idxtype *hmarker, float ubfactor, int npasses) +void FM_2WayNodeRefine2SidedP(ctrl_t *ctrl, graph_t *graph, + idx_t *hmarker, real_t ubfactor, idx_t npasses) { - int i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind; - idxtype *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr; - idxtype *mptr, *mind, *moved, *swaps, *perm; - PQueueType parts[2]; - NRInfoType *rinfo; - int higain, oldgain, mincut, initcut, mincutorder; - int pass, to, other, limit; - int badmaxpwgt, mindiff, newdiff; - int u[2], g[2]; + idx_t i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind; + idx_t *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr; + idx_t *mptr, *mind, *moved, *swaps; + rpq_t *queues[2]; + nrinfo_t *rinfo; + idx_t higain, oldgain, mincut, initcut, mincutorder; + idx_t pass, to, other, limit; + idx_t badmaxpwgt, mindiff, newdiff; + idx_t u[2], g[2]; + + WCOREPUSH; nvtxs = graph->nvtxs; xadj = graph->xadj; @@ -793,42 +491,40 @@ void FM_2WayNodeRefine_TwoSidedP(CtrlType *ctrl, GraphType *graph, pwgts = graph->pwgts; rinfo = graph->nrinfo; + queues[0] = rpqCreate(nvtxs); + queues[1] = rpqCreate(nvtxs); - i = ComputeMaxNodeGain(nvtxs, xadj, adjncy, vwgt); - PQueueInit(ctrl, &parts[0], nvtxs, i); - PQueueInit(ctrl, &parts[1], nvtxs, i); + moved = iwspacemalloc(ctrl, nvtxs); + swaps = iwspacemalloc(ctrl, nvtxs); + mptr = iwspacemalloc(ctrl, nvtxs+1); + mind = iwspacemalloc(ctrl, 2*nvtxs); - moved = idxwspacemalloc(ctrl, nvtxs); - swaps = idxwspacemalloc(ctrl, nvtxs); - mptr = idxwspacemalloc(ctrl, nvtxs+1); - mind = idxwspacemalloc(ctrl, nvtxs); - perm = idxwspacemalloc(ctrl, nvtxs); + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("Partitions: [%6"PRIDX" %6"PRIDX"] Nv-Nb[%6"PRIDX" %6"PRIDX"]. ISep: %6"PRIDX"\n", pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("Partitions: [%6d %6d] Nv-Nb[%6d %6d]. ISep: %6d\n", pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); - - badmaxpwgt = (int)(ubfactor*amax(pwgts[0], pwgts[1])); + badmaxpwgt = (idx_t)(ubfactor*gk_max(pwgts[0], pwgts[1])); for (pass=0; passmincut; nbnd = graph->nbnd; - RandomPermute(nbnd, perm, 1); + /* use the swaps array in place of the traditional perm array to save memory */ + irandArrayPermute(nbnd, swaps, nbnd, 1); for (ii=0; ii= 2*nvtxs-1) + break; + pwgts[2] -= (vwgt[higain]-rinfo[higain].edegrees[other]); newdiff = abs(pwgts[to]+vwgt[higain] - (pwgts[other]-rinfo[higain].edegrees[other])); @@ -909,10 +610,10 @@ void FM_2WayNodeRefine_TwoSidedP(CtrlType *ctrl, GraphType *graph, oldgain = vwgt[k]-rinfo[k].edegrees[to]; rinfo[k].edegrees[to] += vwgt[higain]; if (moved[k] == -5 || moved[k] == -(10+other)) - PQueueUpdate(&parts[other], k, oldgain, oldgain-vwgt[higain]); + rpqUpdate(queues[other], k, oldgain-vwgt[higain]); } else if (where[k] == other) { /* This vertex is pulled into the separator */ - ASSERTP(bndptr[k] == -1, ("%d %d %d\n", k, bndptr[k], where[k])); + ASSERTP(bndptr[k] == -1, ("%"PRIDX" %"PRIDX" %"PRIDX"\n", k, bndptr[k], where[k])); BNDInsert(nbnd, bndind, bndptr, k); mind[nmind++] = k; /* Keep track for rollback */ @@ -929,24 +630,24 @@ void FM_2WayNodeRefine_TwoSidedP(CtrlType *ctrl, GraphType *graph, oldgain = vwgt[kk]-rinfo[kk].edegrees[other]; rinfo[kk].edegrees[other] -= vwgt[k]; if (moved[kk] == -5 || moved[kk] == -(10+to)) - PQueueUpdate(&parts[to], kk, oldgain, oldgain+vwgt[k]); + rpqUpdate(queues[to], kk, oldgain+vwgt[k]); } } /* Insert the new vertex into the priority queue (if it has not been moved). */ if (moved[k] == -1 && (hmarker[k] == -1 || hmarker[k] == to)) { - PQueueInsert(&parts[to], k, vwgt[k]-edegrees[other]); + rpqInsert(queues[to], k, vwgt[k]-edegrees[other]); moved[k] = -(10+to); } #ifdef FULLMOVES /* this does not work as well as the above partial one */ if (moved[k] == -1) { if (hmarker[k] == -1) { - PQueueInsert(&parts[0], k, vwgt[k]-edegrees[1]); - PQueueInsert(&parts[1], k, vwgt[k]-edegrees[0]); + rpqInsert(queues[0], k, vwgt[k]-edegrees[1]); + rpqInsert(queues[1], k, vwgt[k]-edegrees[0]); moved[k] = -5; } else if (hmarker[k] != 2) { - PQueueInsert(&parts[hmarker[k]], k, vwgt[k]-edegrees[(hmarker[k]+1)%2]); + rpqInsert(queues[hmarker[k]], k, vwgt[k]-edegrees[(hmarker[k]+1)%2]); moved[k] = -(10+hmarker[k]); } } @@ -955,8 +656,11 @@ void FM_2WayNodeRefine_TwoSidedP(CtrlType *ctrl, GraphType *graph, } mptr[nswaps+1] = nmind; - IFSET(ctrl->dbglvl, DBG_MOVEINFO, - printf("Moved %6d to %3d, Gain: %5d [%5d] [%4d %4d] \t[%5d %5d %5d]\n", higain, to, g[to], g[other], vwgt[u[to]], vwgt[u[other]], pwgts[0], pwgts[1], pwgts[2])); + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("Moved %6"PRIDX" to %3"PRIDX", Gain: %5"PRIDX" [%5"PRIDX"] " + "[%4"PRIDX" %4"PRIDX"] \t[%5"PRIDX" %5"PRIDX" %5"PRIDX"]\n", + higain, to, g[to], g[other], vwgt[u[to]], vwgt[u[other]], + pwgts[0], pwgts[1], pwgts[2])); } @@ -1002,8 +706,8 @@ void FM_2WayNodeRefine_TwoSidedP(CtrlType *ctrl, GraphType *graph, ASSERT(mincut == pwgts[2]); - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("\tMinimum sep: %6d at %5d, PWGTS: [%6d %6d], NBND: %6d\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd)); + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("\tMinimum sep: %6"PRIDX" at %5"PRIDX", PWGTS: [%6"PRIDX" %6"PRIDX"], NBND: %6"PRIDX"\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd)); graph->mincut = mincut; graph->nbnd = nbnd; @@ -1012,12 +716,102 @@ void FM_2WayNodeRefine_TwoSidedP(CtrlType *ctrl, GraphType *graph, break; } - PQueueFree(ctrl, &parts[0]); - PQueueFree(ctrl, &parts[1]); + rpqDestroy(queues[0]); + rpqDestroy(queues[1]); - idxwspacefree(ctrl, nvtxs+1); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); + WCOREPOP; } + + +/*************************************************************************/ +/*! This function computes a cache-friendly permutation of each partition. + The resulting permutation is retuned in old2new, which is a vector of + size nvtxs such for vertex i, old2new[i] is its new vertex number. +*/ +/**************************************************************************/ +int METIS_CacheFriendlyReordering(idx_t nvtxs, idx_t *xadj, idx_t *adjncy, + idx_t *part, idx_t *old2new) +{ + idx_t i, j, k, first, last, lastlevel, maxdegree, nparts; + idx_t *cot, *pos, *pwgts; + ikv_t *levels; + + InitRandom(123); + + /* This array ([C]losed[O]pen[T]odo => cot) serves three purposes. + Positions from [0...first) is the current iperm[] vector of the explored vertices; + Positions from [first...last) is the OPEN list (i.e., visited vertices); + Positions from [last...nvtxs) is the todo list. */ + cot = iincset(nvtxs, 0, imalloc(nvtxs, "METIS_CacheFriendlyReordering: cor")); + + /* This array will function like pos + touched of the CC method */ + pos = iincset(nvtxs, 0, imalloc(nvtxs, "METIS_CacheFriendlyReordering: pos")); + + /* pick a random starting vertex */ + i = irandInRange(nvtxs); + pos[0] = cot[0] = i; + pos[i] = cot[i] = 0; + + /* compute a BFS ordering */ + first = last = 0; + lastlevel = 0; + maxdegree = 0; + while (first < nvtxs) { + if (first == last) { /* Find another starting vertex */ + k = cot[last]; + ASSERT(pos[k] >= 0); + pos[k] = --lastlevel; /* mark node as being visited by assigning its current level (-ve) */ + last++; + } + + i = cot[first++]; + maxdegree = (maxdegree < xadj[i+1]-xadj[i] ? xadj[i+1]-xadj[i] : maxdegree); + for (j=xadj[i]; j= 0) { + /* pos[k] is the location within cot of where k resides (it is in the 'todo' part); + put in that location cot[last] that we are about to overwrite + and update pos[cot[last]] to reflect that. */ + cot[pos[k]] = cot[last]; + pos[cot[last]] = pos[k]; + + cot[last++] = k; /* put node at the end of the "queue" */ + pos[k] = pos[i]-1; /* mark node as being visited by assigning to next level */ + lastlevel = pos[k]; /* for correctly advancing the levels in case of disconnected graphs */ + } + } + } +// printf("lastlevel: %d\n", (int)-lastlevel); + + /* sort based on decreasing level and decreasing degree (RCM) */ + levels = ikvmalloc(nvtxs, "METIS_CacheFriendlyReordering: levels"); + maxdegree++; + for (i=0; i - -#ifndef _MSC_VER -#define __cdecl -#endif - - -/************************************************************************* -* Data-structures -**************************************************************************/ -/* Undefine the following #define in order to use short int as the idxtype */ -#define IDXTYPE_INT - -/* Indexes are as long as integers for now */ -#ifdef IDXTYPE_INT -typedef int idxtype; -#else -typedef short idxtype; -#endif - - -/************************************************************************* -* Constants -**************************************************************************/ -#define PARMETIS_MAJOR_VERSION 3 -#define PARMETIS_MINOR_VERSION 2 -#define PARMETIS_SUBMINOR_VERSION 0 - - -/************************************************************************* -* Function prototypes -**************************************************************************/ -#ifdef __cplusplus -extern "C" { -#endif - -/*------------------------------------------------------------------- -* API Introduced with Release 3.0 (current API) -*--------------------------------------------------------------------*/ -void __cdecl ParMETIS_V3_AdaptiveRepart( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *vsize, idxtype *adjwgt, int *wgtflag, int *numflag, int *ncon, - int *nparts, float *tpwgts, float *ubvec, float *ipc2redist, - int *options, int *edgecut, idxtype *part, MPI_Comm *comm); - -void __cdecl ParMETIS_V3_PartGeomKway( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *ndims, float *xyz, - int *ncon, int *nparts, float *tpwgts, float *ubvec, int *options, - int *edgecut, idxtype *part, MPI_Comm *comm); - -void __cdecl ParMETIS_V3_PartGeom( - idxtype *vtxdist, int *ndims, float *xyz, idxtype *part, MPI_Comm *comm); - -void __cdecl ParMETIS_V3_PartKway( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *ncon, int *nparts, - float *tpwgts, float *ubvec, int *options, int *edgecut, idxtype *part, - MPI_Comm *comm); - -void __cdecl ParMETIS_V3_Mesh2Dual( - idxtype *elmdist, idxtype *eptr, idxtype *eind, int *numflag, - int *ncommonnodes, idxtype **xadj, idxtype **adjncy, MPI_Comm *comm); - -void __cdecl ParMETIS_V3_PartMeshKway( - idxtype *elmdist, idxtype *eptr, idxtype *eind, idxtype *elmwgt, - int *wgtflag, int *numflag, int *ncon, int *ncommonnodes, int *nparts, - float *tpwgts, float *ubvec, int *options, int *edgecut, idxtype *part, - MPI_Comm *comm); - -void __cdecl ParMETIS_V3_NodeND( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int *numflag, - int *options, idxtype *order, idxtype *sizes, MPI_Comm *comm); - -void __cdecl ParMETIS_V3_RefineKway( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *ncon, int *nparts, - float *tpwgts, float *ubvec, int *options, int *edgecut, - idxtype *part, MPI_Comm *comm); - -void __cdecl ParMETIS_V32_NodeND( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *numflag, int *mtype, int *rtype, int *p_nseps, int *s_nseps, - float *ubfrac, int *seed, int *dbglvl, idxtype *order, - idxtype *sizes, MPI_Comm *comm); - - - -/*------------------------------------------------------------------ -* Backward compatibility routines with Release 2.0 -*-------------------------------------------------------------------*/ -void __cdecl ParMETIS_PartKway( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, int *options, - int *edgecut, idxtype *part, MPI_Comm *comm); - -void __cdecl ParMETIS_PartGeomKway( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, - int *wgtflag, int *numflag, int *ndims, float *xyz, int *nparts, int *options, - int *edgecut, idxtype *part, MPI_Comm *comm); - -void __cdecl ParMETIS_PartGeom( - idxtype *vtxdist, int *ndims, float *xyz, idxtype *part, MPI_Comm *comm); - -void __cdecl ParMETIS_PartGeomRefine( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *ndims, float *xyz, - int *options, int *edgecut, idxtype *part, MPI_Comm *comm); - -void __cdecl ParMETIS_RefineKway( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *options, int *edgecut, - idxtype *part, MPI_Comm *comm); - -void __cdecl ParMETIS_RepartLDiffusion( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *options, int *edgecut, - idxtype *part, MPI_Comm *comm); - -void __cdecl ParMETIS_RepartGDiffusion( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *options, int *edgecut, - idxtype *part, MPI_Comm *comm); - -void __cdecl ParMETIS_RepartRemap( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, - int *wgtflag, int *numflag, int *options, int *edgecut, idxtype *part, - MPI_Comm *comm); - -void __cdecl ParMETIS_RepartMLRemap( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, - int *wgtflag, int *numflag, int *options, int *edgecut, idxtype *part, - MPI_Comm *comm); - -void __cdecl ParMETIS_NodeND( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, - idxtype *order, idxtype *sizes, MPI_Comm *comm); - -void __cdecl ParMETIS_SerialNodeND( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, - idxtype *order, idxtype *sizes, MPI_Comm *comm); - - - - -/*------------------------------------------------------------------- -* Backward compatibility routines with Release 1.0 -*--------------------------------------------------------------------*/ -void __cdecl PARKMETIS( - idxtype *vtxdist, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, idxtype *adjwgt, - idxtype *part, int *options, MPI_Comm comm); - -void __cdecl PARGKMETIS( - idxtype *vtxdist, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, idxtype *adjwgt, - int ndims, float *xyz, idxtype *part, int *options, MPI_Comm comm); - -void __cdecl PARGRMETIS( - idxtype *vtxdist, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, idxtype *adjwgt, - int ndims, float *xyz, idxtype *part, int *options, MPI_Comm comm); - -void __cdecl PARGMETIS( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int ndims, float *xyz, - idxtype *part, int *options, MPI_Comm comm); - -void __cdecl PARRMETIS( - idxtype *vtxdist, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, - idxtype *adjwgt, idxtype *part, int *options, MPI_Comm comm); - -void __cdecl PARUAMETIS( - idxtype *vtxdist, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, - idxtype *adjwgt, idxtype *part, int *options, MPI_Comm comm); - -void __cdecl PARDAMETIS( - idxtype *vtxdist, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, idxtype *adjwgt, - idxtype *part, int *options, MPI_Comm comm); - -#ifdef __cplusplus -} -#endif - - -/************************************************************************* -* Various constants used for the different parameters -**************************************************************************/ -/* Matching types */ -#define PARMETIS_MTYPE_LOCAL 1 /* Restrict matching to within processor vertices */ -#define PARMETIS_MTYPE_GLOBAL 2 /* Remote vertices can be matched */ - -/* Separator refinement types */ -#define PARMETIS_SRTYPE_GREEDY 1 /* Vertices are visted from highest to lowest gain */ -#define PARMETIS_SRTYPE_2PHASE 2 /* Separators are refined in a two-phase fashion using - PARMETIS_SRTYPE_GREEDY for the 2nd phase */ - -/* Coupling types for ParMETIS_V3_RefineKway & ParMETIS_V3_AdaptiveRepart */ -#define PARMETIS_PSR_COUPLED 1 /* # of partitions == # of processors */ -#define PARMETIS_PSR_UNCOUPLED 2 /* # of partitions != # of processors */ - - -/* Debug levels (fields should be ORed) */ -#define PARMETIS_DBGLVL_TIME 1 /* Perform timing analysis */ -#define PARMETIS_DBGLVL_INFO 2 /* Perform timing analysis */ -#define PARMETIS_DBGLVL_PROGRESS 4 /* Show the coarsening progress */ -#define PARMETIS_DBGLVL_REFINEINFO 8 /* Show info on communication during folding */ -#define PARMETIS_DBGLVL_MATCHINFO 16 /* Show info on matching */ -#define PARMETIS_DBGLVL_RMOVEINFO 32 /* Show info on communication during folding */ -#define PARMETIS_DBGLVL_REMAP 64 /* Determines if remapping will take place */ - -#endif diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/pmetis.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/pmetis.c index 5380dde2..9174aa37 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/pmetis.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/pmetis.c @@ -1,219 +1,309 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * pmetis.c - * - * This file contains the top level routines for the multilevel recursive - * bisection algorithm PMETIS. - * - * Started 7/24/97 - * George - * - * $Id: pmetis.c,v 1.1 2003/07/16 15:55:16 karypis Exp $ - * - */ - -#include - - -/************************************************************************* -* This function is the entry point for PMETIS -**************************************************************************/ -void METIS_PartGraphRecursive(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, - int *options, int *edgecut, idxtype *part) -{ - int i; - float *tpwgts; +/** +\file +\brief This file contains the top level routines for the multilevel recursive bisection + algorithm PMETIS. + +\date Started 7/24/1997 +\author George +\author Copyright 1997-2009, Regents of the University of Minnesota +\version\verbatim $Id: pmetis.c 10513 2011-07-07 22:06:03Z karypis $ \endverbatim +*/ - tpwgts = fmalloc(*nparts, "KMETIS: tpwgts"); - for (i=0; i<*nparts; i++) - tpwgts[i] = 1.0/(1.0*(*nparts)); - METIS_WPartGraphRecursive(nvtxs, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, - tpwgts, options, edgecut, part); - GKfree((void **)&tpwgts, LTERM); -} +#include "metislib.h" + + +/*************************************************************************/ +/*! \ingroup api + \brief Recursive partitioning routine. + + This function computes a partitioning of a graph based on multilevel + recursive bisection. It can be used to partition a graph into \e k + parts. The objective of the partitioning is to minimize the edgecut + subject to one or more balancing constraints. + + \param[in] nvtxs is the number of vertices in the graph. + + \param[in] ncon is the number of balancing constraints. For the standard + partitioning problem in which each vertex is either unweighted + or has a single weight, ncon should be 1. + + \param[in] xadj is an array of size nvtxs+1 used to specify the starting + positions of the adjacency structure of the vertices in the + adjncy array. + + \param[in] adjncy is an array of size to the sum of the degrees of the + graph that stores for each vertex the set of vertices that + is adjacent to. + + \param[in] vwgt is an array of size nvtxs*ncon that stores the weights + of the vertices for each constraint. The ncon weights for the + ith vertex are stored in the ncon consecutive locations starting + at vwgt[i*ncon]. When ncon==1, a NULL value can be passed indicating + that all the vertices in the graph have the same weight. + + \param[in] adjwgt is an array of size equal to adjncy, specifying the weight + for each edge (i.e., adjwgt[j] corresponds to the weight of the + edge stored in adjncy[j]). + A NULL value can be passed indicating that all the edges in the + graph have the same weight. + + \param[in] nparts is the number of desired partitions. + + \param[in] tpwgts is an array of size nparts*ncon that specifies the + desired weight for each part and constraint. The \e{target partition + weight} for the ith part and jth constraint is specified + at tpwgts[i*ncon+j] (the numbering of i and j starts from 0). + For each constraint, the sum of the tpwgts[] entries must be + 1.0 (i.e., \f$ \sum_i tpwgts[i*ncon+j] = 1.0 \f$). + A NULL value can be passed indicating that the graph should + be equally divided among the parts. + + \param[in] ubvec is an array of size ncon that specifies the allowed + load imbalance tolerance for each constraint. + For the ith part and jth constraint the allowed weight is the + ubvec[j]*tpwgts[i*ncon+j] fraction of the jth's constraint total + weight. The load imbalances must be greater than 1.0. + A NULL value can be passed indicating that the load imbalance + tolerance for each constraint should be 1.001 (for ncon==1) + or 1.01 (for ncon>1). + \params[in] options is the array for passing additional parameters + in order to customize the behaviour of the partitioning + algorithm. + \params[out] edgecut stores the cut of the partitioning. -/************************************************************************* -* This function is the entry point for PWMETIS that accepts exact weights -* for the target partitions -**************************************************************************/ -void METIS_WPartGraphRecursive(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, - float *tpwgts, int *options, int *edgecut, idxtype *part) + \params[out] part is an array of size nvtxs used to store the + computed partitioning. The partition number for the ith + vertex is stored in part[i]. Based on the numflag parameter, + the numbering of the parts starts from either 0 or 1. + + + \returns + \retval METIS_OK indicates that the function returned normally. + \retval METIS_ERROR_INPUT indicates an input error. + \retval METIS_ERROR_MEMORY indicates that it could not allocate + the required memory. + +*/ +/*************************************************************************/ +int METIS_PartGraphRecursive(idx_t *nvtxs, idx_t *ncon, idx_t *xadj, + idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, + idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, + idx_t *objval, idx_t *part) { - int i, j; - GraphType graph; - CtrlType ctrl; - float *mytpwgts; + int sigrval=0, renumber=0; + graph_t *graph; + ctrl_t *ctrl; - if (*numflag == 1) - Change2CNumbering(*nvtxs, xadj, adjncy); + /* set up malloc cleaning code and signal catchers */ + if (!gk_malloc_init()) + return METIS_ERROR_MEMORY; - SetUpGraph(&graph, OP_PMETIS, *nvtxs, 1, xadj, adjncy, vwgt, adjwgt, *wgtflag); + gk_sigtrap(); - if (options[0] == 0) { /* Use the default parameters */ - ctrl.CType = PMETIS_CTYPE; - ctrl.IType = PMETIS_ITYPE; - ctrl.RType = PMETIS_RTYPE; - ctrl.dbglvl = PMETIS_DBGLVL; - } - else { - ctrl.CType = options[OPTION_CTYPE]; - ctrl.IType = options[OPTION_ITYPE]; - ctrl.RType = options[OPTION_RTYPE]; - ctrl.dbglvl = options[OPTION_DBGLVL]; + if ((sigrval = gk_sigcatch()) != 0) + goto SIGTHROW; + + /* set up the run parameters */ + ctrl = SetupCtrl(METIS_OP_PMETIS, options, *ncon, *nparts, tpwgts, ubvec); + if (!ctrl) { + gk_siguntrap(); + return METIS_ERROR_INPUT; } - ctrl.optype = OP_PMETIS; - ctrl.CoarsenTo = 20; - ctrl.maxvwgt = 1.5*(idxsum(*nvtxs, graph.vwgt)/ctrl.CoarsenTo); - mytpwgts = fmalloc(*nparts, "PWMETIS: mytpwgts"); - for (i=0; i<*nparts; i++) - mytpwgts[i] = tpwgts[i]; + /* if required, change the numbering to 0 */ + if (ctrl->numflag == 1) { + Change2CNumbering(*nvtxs, xadj, adjncy); + renumber = 1; + } - InitRandom(-1); + /* set up the graph */ + graph = SetupGraph(ctrl, *nvtxs, *ncon, xadj, adjncy, vwgt, vsize, adjwgt); - AllocateWorkSpace(&ctrl, &graph, *nparts); + /* allocate workspace memory */ + AllocateWorkSpace(ctrl, graph); - IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); + /* start the partitioning */ + IFSET(ctrl->dbglvl, METIS_DBG_TIME, InitTimers(ctrl)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->TotalTmr)); - *edgecut = MlevelRecursiveBisection(&ctrl, &graph, *nparts, part, mytpwgts, 1.000, 0); + iset(*nvtxs, 0, part); + *objval = (*nparts == 1 ? 0 : MlevelRecursiveBisection(ctrl, graph, *nparts, part, ctrl->tpwgts, 0)); - IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); - IFSET(ctrl.dbglvl, DBG_TIME, PrintTimers(&ctrl)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->TotalTmr)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, PrintTimers(ctrl)); - FreeWorkSpace(&ctrl, &graph); - GKfree((void **)&mytpwgts, LTERM); + /* clean up */ + FreeCtrl(&ctrl); - if (*numflag == 1) +SIGTHROW: + /* if required, change the numbering back to 1 */ + if (renumber) Change2FNumbering(*nvtxs, xadj, adjncy, part); -} + gk_siguntrap(); + gk_malloc_cleanup(0); + + return metis_rcode(sigrval); +} -/************************************************************************* -* This function takes a graph and produces a bisection of it -**************************************************************************/ -int MlevelRecursiveBisection(CtrlType *ctrl, GraphType *graph, int nparts, idxtype *part, float *tpwgts, float ubfactor, int fpart) +/*************************************************************************/ +/*! This function is the top-level driver of the recursive bisection + routine. */ +/*************************************************************************/ +idx_t MlevelRecursiveBisection(ctrl_t *ctrl, graph_t *graph, idx_t nparts, + idx_t *part, real_t *tpwgts, idx_t fpart) { - int i, j, nvtxs, cut, tvwgt, tpwgts2[2]; - idxtype *label, *where; - GraphType lgraph, rgraph; - float wsum; - - nvtxs = graph->nvtxs; - if (nvtxs == 0) { - printf("\t***Cannot bisect a graph with 0 vertices!\n\t***You are trying to partition a graph into too many parts!\n"); + idx_t i, j, nvtxs, ncon, objval; + idx_t *label, *where; + graph_t *lgraph, *rgraph; + real_t wsum, *tpwgts2; + + if ((nvtxs = graph->nvtxs) == 0) { + printf("\t***Cannot bisect a graph with 0 vertices!\n" + "\t***You are trying to partition a graph into too many parts!\n"); return 0; } - /* Determine the weights of the partitions */ - tvwgt = idxsum(nvtxs, graph->vwgt); - tpwgts2[0] = tvwgt*ssum(nparts/2, tpwgts); - tpwgts2[1] = tvwgt-tpwgts2[0]; + ncon = graph->ncon; + + /* determine the weights of the two partitions as a function of the weight of the + target partition weights */ + WCOREPUSH; + tpwgts2 = rwspacemalloc(ctrl, 2*ncon); + for (i=0; i>1), tpwgts+i, ncon); + tpwgts2[ncon+i] = 1.0 - tpwgts2[i]; + } - MlevelEdgeBisection(ctrl, graph, tpwgts2, ubfactor); - cut = graph->mincut; + /* perform the bisection */ + objval = MultilevelBisect(ctrl, graph, tpwgts2); - /* printf("%5d %5d %5d [%5d %f]\n", tpwgts2[0], tpwgts2[1], cut, tvwgt, ssum(nparts/2, tpwgts));*/ + WCOREPOP; label = graph->label; where = graph->where; for (i=0; i 2) { + if (nparts > 2) SplitGraphPart(ctrl, graph, &lgraph, &rgraph); - /* printf("%d %d\n", lgraph.nvtxs, rgraph.nvtxs); */ - } - /* Free the memory of the top level graph */ - GKfree((void **)&graph->gdata, &graph->rdata, &graph->label, LTERM); + FreeGraph(&graph); /* Scale the fractions in the tpwgts according to the true weight */ - wsum = ssum(nparts/2, tpwgts); - sscale(nparts/2, 1.0/wsum, tpwgts); - sscale(nparts-nparts/2, 1.0/(1.0-wsum), tpwgts+nparts/2); - /* - for (i=0; i>1), tpwgts+i, ncon); + rscale((nparts>>1), 1.0/wsum, tpwgts+i, ncon); + rscale(nparts-(nparts>>1), 1.0/(1.0-wsum), tpwgts+(nparts>>1)*ncon+i, ncon); + } /* Do the recursive call */ if (nparts > 3) { - cut += MlevelRecursiveBisection(ctrl, &lgraph, nparts/2, part, tpwgts, ubfactor, fpart); - cut += MlevelRecursiveBisection(ctrl, &rgraph, nparts-nparts/2, part, tpwgts+nparts/2, ubfactor, fpart+nparts/2); + objval += MlevelRecursiveBisection(ctrl, lgraph, (nparts>>1), part, + tpwgts, fpart); + objval += MlevelRecursiveBisection(ctrl, rgraph, nparts-(nparts>>1), part, + tpwgts+(nparts>>1)*ncon, fpart+(nparts>>1)); } else if (nparts == 3) { - cut += MlevelRecursiveBisection(ctrl, &rgraph, nparts-nparts/2, part, tpwgts+nparts/2, ubfactor, fpart+nparts/2); - GKfree((void **)&lgraph.gdata, &lgraph.label, LTERM); + FreeGraph(&lgraph); + objval += MlevelRecursiveBisection(ctrl, rgraph, nparts-(nparts>>1), part, + tpwgts+(nparts>>1)*ncon, fpart+(nparts>>1)); } - return cut; + return objval; } -/************************************************************************* -* This function performs multilevel bisection -**************************************************************************/ -void MlevelEdgeBisection(CtrlType *ctrl, GraphType *graph, int *tpwgts, float ubfactor) +/*************************************************************************/ +/*! This function performs a multilevel bisection */ +/*************************************************************************/ +idx_t MultilevelBisect(ctrl_t *ctrl, graph_t *graph, real_t *tpwgts) { - GraphType *cgraph; + idx_t i, niparts, bestobj=0, curobj=0, *bestwhere=NULL; + graph_t *cgraph; + real_t bestbal=0.0, curbal=0.0; - cgraph = Coarsen2Way(ctrl, graph); + Setup2WayBalMultipliers(ctrl, graph, tpwgts); - Init2WayPartition(ctrl, cgraph, tpwgts, ubfactor); + WCOREPUSH; - Refine2Way(ctrl, graph, cgraph, tpwgts, ubfactor); + if (ctrl->ncuts > 1) + bestwhere = iwspacemalloc(ctrl, graph->nvtxs); -/* - IsConnectedSubdomain(ctrl, graph, 0); - IsConnectedSubdomain(ctrl, graph, 1); -*/ -} + for (i=0; incuts; i++) { + cgraph = CoarsenGraph(ctrl, graph); + niparts = (cgraph->nvtxs <= ctrl->CoarsenTo ? SMALLNIPARTS : LARGENIPARTS); + Init2WayPartition(ctrl, cgraph, tpwgts, niparts); + Refine2Way(ctrl, graph, cgraph, tpwgts); + curobj = graph->mincut; + curbal = ComputeLoadImbalanceDiff(graph, 2, ctrl->pijbm, ctrl->ubfactors); -/************************************************************************* -* This function takes a graph and a bisection and splits it into two graphs. -**************************************************************************/ -void SplitGraphPart(CtrlType *ctrl, GraphType *graph, GraphType *lgraph, GraphType *rgraph) -{ - int i, j, k, kk, l, istart, iend, mypart, nvtxs, ncon, snvtxs[2], snedges[2], sum; - idxtype *xadj, *vwgt, *adjncy, *adjwgt, *adjwgtsum, *label, *where, *bndptr; - idxtype *sxadj[2], *svwgt[2], *sadjncy[2], *sadjwgt[2], *sadjwgtsum[2], *slabel[2]; - idxtype *rename; - idxtype *auxadjncy, *auxadjwgt; - float *nvwgt, *snvwgt[2], *npwgts; + if (i == 0 + || (curbal <= 0.0005 && bestobj > curobj) + || (bestbal > 0.0005 && curbal < bestbal)) { + bestobj = curobj; + bestbal = curbal; + if (i < ctrl->ncuts-1) + icopy(graph->nvtxs, graph->where, bestwhere); + } + if (bestobj == 0) + break; - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->SplitTmr)); + if (i < ctrl->ncuts-1) + FreeRData(graph); + } - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - vwgt = graph->vwgt; - nvwgt = graph->nvwgt; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - adjwgtsum = graph->adjwgtsum; - label = graph->label; - where = graph->where; - bndptr = graph->bndptr; - npwgts = graph->npwgts; + if (bestobj != curobj) { + icopy(graph->nvtxs, bestwhere, graph->where); + Compute2WayPartitionParams(ctrl, graph); + } + + WCOREPOP; + + return bestobj; +} + + +/*************************************************************************/ +/*! This function splits a graph into two based on its bisection */ +/*************************************************************************/ +void SplitGraphPart(ctrl_t *ctrl, graph_t *graph, graph_t **r_lgraph, + graph_t **r_rgraph) +{ + idx_t i, j, k, l, istart, iend, mypart, nvtxs, ncon, snvtxs[2], snedges[2]; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *label, *where, *bndptr; + idx_t *sxadj[2], *svwgt[2], *sadjncy[2], *sadjwgt[2], *slabel[2]; + idx_t *rename; + idx_t *auxadjncy, *auxadjwgt; + graph_t *lgraph, *rgraph; + + WCOREPUSH; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->SplitTmr)); + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + label = graph->label; + where = graph->where; + bndptr = graph->bndptr; ASSERT(bndptr != NULL); - rename = idxwspacemalloc(ctrl, nvtxs); + rename = iwspacemalloc(ctrl, nvtxs); snvtxs[0] = snvtxs[1] = snedges[0] = snedges[1] = 0; for (i=0; ixadj; - svwgt[0] = lgraph->vwgt; - snvwgt[0] = lgraph->nvwgt; - sadjwgtsum[0] = lgraph->adjwgtsum; - sadjncy[0] = lgraph->adjncy; - sadjwgt[0] = lgraph->adjwgt; - slabel[0] = lgraph->label; - - SetUpSplitGraph(graph, rgraph, snvtxs[1], snedges[1]); - sxadj[1] = rgraph->xadj; - svwgt[1] = rgraph->vwgt; - snvwgt[1] = rgraph->nvwgt; - sadjwgtsum[1] = rgraph->adjwgtsum; - sadjncy[1] = rgraph->adjncy; - sadjwgt[1] = rgraph->adjwgt; - slabel[1] = rgraph->label; + lgraph = SetupSplitGraph(graph, snvtxs[0], snedges[0]); + sxadj[0] = lgraph->xadj; + svwgt[0] = lgraph->vwgt; + sadjncy[0] = lgraph->adjncy; + sadjwgt[0] = lgraph->adjwgt; + slabel[0] = lgraph->label; + + rgraph = SetupSplitGraph(graph, snvtxs[1], snedges[1]); + sxadj[1] = rgraph->xadj; + svwgt[1] = rgraph->vwgt; + sadjncy[1] = rgraph->adjncy; + sadjwgt[1] = rgraph->adjwgt; + slabel[1] = rgraph->label; snvtxs[0] = snvtxs[1] = snedges[0] = snedges[1] = 0; sxadj[0][0] = sxadj[1][0] = 0; for (i=0; inedges = snedges[0]; rgraph->nedges = snedges[1]; - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->SplitTmr)); - - idxwspacefree(ctrl, nvtxs); -} + SetupGraph_tvwgt(lgraph); + SetupGraph_tvwgt(rgraph); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->SplitTmr)); -/************************************************************************* -* Setup the various arrays for the splitted graph -**************************************************************************/ -void SetUpSplitGraph(GraphType *graph, GraphType *sgraph, int snvtxs, int snedges) -{ - InitGraph(sgraph); - sgraph->nvtxs = snvtxs; - sgraph->nedges = snedges; - sgraph->ncon = graph->ncon; - - /* Allocate memory for the splitted graph */ - if (graph->ncon == 1) { - sgraph->gdata = idxmalloc(4*snvtxs+1 + 2*snedges, "SetUpSplitGraph: gdata"); - - sgraph->xadj = sgraph->gdata; - sgraph->vwgt = sgraph->gdata + snvtxs+1; - sgraph->adjwgtsum = sgraph->gdata + 2*snvtxs+1; - sgraph->cmap = sgraph->gdata + 3*snvtxs+1; - sgraph->adjncy = sgraph->gdata + 4*snvtxs+1; - sgraph->adjwgt = sgraph->gdata + 4*snvtxs+1 + snedges; - } - else { - sgraph->gdata = idxmalloc(3*snvtxs+1 + 2*snedges, "SetUpSplitGraph: gdata"); - - sgraph->xadj = sgraph->gdata; - sgraph->adjwgtsum = sgraph->gdata + snvtxs+1; - sgraph->cmap = sgraph->gdata + 2*snvtxs+1; - sgraph->adjncy = sgraph->gdata + 3*snvtxs+1; - sgraph->adjwgt = sgraph->gdata + 3*snvtxs+1 + snedges; - - sgraph->nvwgt = fmalloc(graph->ncon*snvtxs, "SetUpSplitGraph: nvwgt"); - } + *r_lgraph = lgraph; + *r_rgraph = rgraph; - sgraph->label = idxmalloc(snvtxs, "SetUpSplitGraph: sgraph->label"); + WCOREPOP; } diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/pqueue.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/pqueue.c deleted file mode 100644 index b416ba9a..00000000 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/pqueue.c +++ /dev/null @@ -1,579 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * pqueue.c - * - * This file contains functions for manipulating the bucket list - * representation of the gains associated with each vertex in a graph. - * These functions are used by the refinement algorithms - * - * Started 9/2/94 - * George - * - * $Id: pqueue.c,v 1.1 2003/07/16 15:55:16 karypis Exp $ - * - */ - -#include - - -/************************************************************************* -* This function initializes the data structures of the priority queue -**************************************************************************/ -void PQueueInit(CtrlType *ctrl, PQueueType *queue, int maxnodes, int maxgain) -{ - int i, j, ncore; - - queue->nnodes = 0; - queue->maxnodes = maxnodes; - - queue->buckets = NULL; - queue->nodes = NULL; - queue->heap = NULL; - queue->locator = NULL; - - if (maxgain > PLUS_GAINSPAN || maxnodes < 500) - queue->type = 2; - else - queue->type = 1; - - if (queue->type == 1) { - queue->pgainspan = amin(PLUS_GAINSPAN, maxgain); - queue->ngainspan = amin(NEG_GAINSPAN, maxgain); - - j = queue->ngainspan+queue->pgainspan+1; - - ncore = 2 + (sizeof(ListNodeType)/sizeof(idxtype))*maxnodes + (sizeof(ListNodeType *)/sizeof(idxtype))*j; - - if (WspaceAvail(ctrl) > ncore) { - queue->nodes = (ListNodeType *)idxwspacemalloc(ctrl, (sizeof(ListNodeType)/sizeof(idxtype))*maxnodes); - queue->buckets = (ListNodeType **)idxwspacemalloc(ctrl, (sizeof(ListNodeType *)/sizeof(idxtype))*j); - queue->mustfree = 0; - } - else { /* Not enough memory in the wspace, allocate it */ - queue->nodes = (ListNodeType *)idxmalloc((sizeof(ListNodeType)/sizeof(idxtype))*maxnodes, "PQueueInit: queue->nodes"); - queue->buckets = (ListNodeType **)idxmalloc((sizeof(ListNodeType *)/sizeof(idxtype))*j, "PQueueInit: queue->buckets"); - queue->mustfree = 1; - } - - for (i=0; inodes[i].id = i; - - for (i=0; ibuckets[i] = NULL; - - queue->buckets += queue->ngainspan; /* Advance buckets by the ngainspan proper indexing */ - queue->maxgain = -queue->ngainspan; - } - else { - queue->heap = (KeyValueType *)idxwspacemalloc(ctrl, (sizeof(KeyValueType)/sizeof(idxtype))*maxnodes); - queue->locator = idxwspacemalloc(ctrl, maxnodes); - idxset(maxnodes, -1, queue->locator); - } - -} - - -/************************************************************************* -* This function resets the buckets -**************************************************************************/ -void PQueueReset(PQueueType *queue) -{ - int i, j; - queue->nnodes = 0; - - if (queue->type == 1) { - queue->maxgain = -queue->ngainspan; - - j = queue->ngainspan+queue->pgainspan+1; - queue->buckets -= queue->ngainspan; - for (i=0; ibuckets[i] = NULL; - queue->buckets += queue->ngainspan; - } - else { - idxset(queue->maxnodes, -1, queue->locator); - } - -} - - -/************************************************************************* -* This function frees the buckets -**************************************************************************/ -void PQueueFree(CtrlType *ctrl, PQueueType *queue) -{ - - if (queue->type == 1) { - if (queue->mustfree) { - queue->buckets -= queue->ngainspan; - GKfree((void **)&queue->nodes, &queue->buckets, LTERM); - } - else { - idxwspacefree(ctrl, sizeof(ListNodeType *)*(queue->ngainspan+queue->pgainspan+1)/sizeof(idxtype)); - idxwspacefree(ctrl, sizeof(ListNodeType)*queue->maxnodes/sizeof(idxtype)); - } - } - else { - idxwspacefree(ctrl, sizeof(KeyValueType)*queue->maxnodes/sizeof(idxtype)); - idxwspacefree(ctrl, queue->maxnodes); - } - - queue->maxnodes = 0; -} - - -/************************************************************************* -* This function returns the number of nodes in the queue -**************************************************************************/ -int PQueueGetSize(PQueueType *queue) -{ - return queue->nnodes; -} - - -/************************************************************************* -* This function adds a node of certain gain into a partition -**************************************************************************/ -int PQueueInsert(PQueueType *queue, int node, int gain) -{ - int i, j, k; - idxtype *locator; - ListNodeType *newnode; - KeyValueType *heap; - - if (queue->type == 1) { - ASSERT(gain >= -queue->ngainspan && gain <= queue->pgainspan); - - /* Allocate and add the node */ - queue->nnodes++; - newnode = queue->nodes + node; - - /* Attach this node in the doubly-linked list */ - newnode->next = queue->buckets[gain]; - newnode->prev = NULL; - if (newnode->next != NULL) - newnode->next->prev = newnode; - queue->buckets[gain] = newnode; - - if (queue->maxgain < gain) - queue->maxgain = gain; - } - else { - ASSERT(CheckHeap(queue)); - - heap = queue->heap; - locator = queue->locator; - - ASSERT(locator[node] == -1); - - i = queue->nnodes++; - while (i > 0) { - j = (i-1)/2; - if (heap[j].key < gain) { - heap[i] = heap[j]; - locator[heap[i].val] = i; - i = j; - } - else - break; - } - ASSERT(i >= 0); - heap[i].key = gain; - heap[i].val = node; - locator[node] = i; - - ASSERT(CheckHeap(queue)); - } - - return 0; -} - - -/************************************************************************* -* This function deletes a node from a partition and reinserts it with -* an updated gain -**************************************************************************/ -int PQueueDelete(PQueueType *queue, int node, int gain) -{ - int i, j, newgain, oldgain; - idxtype *locator; - ListNodeType *newnode, **buckets; - KeyValueType *heap; - - if (queue->type == 1) { - ASSERT(gain >= -queue->ngainspan && gain <= queue->pgainspan); - ASSERT(queue->nnodes > 0); - - buckets = queue->buckets; - queue->nnodes--; - newnode = queue->nodes+node; - - /* Remove newnode from the doubly-linked list */ - if (newnode->prev != NULL) - newnode->prev->next = newnode->next; - else - buckets[gain] = newnode->next; - if (newnode->next != NULL) - newnode->next->prev = newnode->prev; - - if (buckets[gain] == NULL && gain == queue->maxgain) { - if (queue->nnodes == 0) - queue->maxgain = -queue->ngainspan; - else - for (; buckets[queue->maxgain]==NULL; queue->maxgain--); - } - } - else { /* Heap Priority Queue */ - heap = queue->heap; - locator = queue->locator; - - ASSERT(locator[node] != -1); - ASSERT(heap[locator[node]].val == node); - - ASSERT(CheckHeap(queue)); - - i = locator[node]; - locator[node] = -1; - - if (--queue->nnodes > 0 && heap[queue->nnodes].val != node) { - node = heap[queue->nnodes].val; - newgain = heap[queue->nnodes].key; - oldgain = heap[i].key; - - if (oldgain < newgain) { /* Filter-up */ - while (i > 0) { - j = (i-1)>>1; - if (heap[j].key < newgain) { - heap[i] = heap[j]; - locator[heap[i].val] = i; - i = j; - } - else - break; - } - } - else { /* Filter down */ - while ((j=2*i+1) < queue->nnodes) { - if (heap[j].key > newgain) { - if (j+1 < queue->nnodes && heap[j+1].key > heap[j].key) - j = j+1; - heap[i] = heap[j]; - locator[heap[i].val] = i; - i = j; - } - else if (j+1 < queue->nnodes && heap[j+1].key > newgain) { - j = j+1; - heap[i] = heap[j]; - locator[heap[i].val] = i; - i = j; - } - else - break; - } - } - - heap[i].key = newgain; - heap[i].val = node; - locator[node] = i; - } - - ASSERT(CheckHeap(queue)); - } - - return 0; -} - - - -/************************************************************************* -* This function deletes a node from a partition and reinserts it with -* an updated gain -**************************************************************************/ -int PQueueUpdate(PQueueType *queue, int node, int oldgain, int newgain) -{ - int i, j; - idxtype *locator; - ListNodeType *newnode; - KeyValueType *heap; - - if (oldgain == newgain) - return 0; - - if (queue->type == 1) { - /* First delete the node and then insert it */ - PQueueDelete(queue, node, oldgain); - return PQueueInsert(queue, node, newgain); - } - else { /* Heap Priority Queue */ - heap = queue->heap; - locator = queue->locator; - - ASSERT(locator[node] != -1); - ASSERT(heap[locator[node]].val == node); - ASSERT(heap[locator[node]].key == oldgain); - ASSERT(CheckHeap(queue)); - - i = locator[node]; - - if (oldgain < newgain) { /* Filter-up */ - while (i > 0) { - j = (i-1)>>1; - if (heap[j].key < newgain) { - heap[i] = heap[j]; - locator[heap[i].val] = i; - i = j; - } - else - break; - } - } - else { /* Filter down */ - while ((j=2*i+1) < queue->nnodes) { - if (heap[j].key > newgain) { - if (j+1 < queue->nnodes && heap[j+1].key > heap[j].key) - j = j+1; - heap[i] = heap[j]; - locator[heap[i].val] = i; - i = j; - } - else if (j+1 < queue->nnodes && heap[j+1].key > newgain) { - j = j+1; - heap[i] = heap[j]; - locator[heap[i].val] = i; - i = j; - } - else - break; - } - } - - heap[i].key = newgain; - heap[i].val = node; - locator[node] = i; - - ASSERT(CheckHeap(queue)); - } - - return 0; -} - - - -/************************************************************************* -* This function deletes a node from a partition and reinserts it with -* an updated gain -**************************************************************************/ -void PQueueUpdateUp(PQueueType *queue, int node, int oldgain, int newgain) -{ - int i, j; - idxtype *locator; - ListNodeType *newnode, **buckets; - KeyValueType *heap; - - if (oldgain == newgain) - return; - - if (queue->type == 1) { - ASSERT(oldgain >= -queue->ngainspan && oldgain <= queue->pgainspan); - ASSERT(newgain >= -queue->ngainspan && newgain <= queue->pgainspan); - ASSERT(queue->nnodes > 0); - - buckets = queue->buckets; - newnode = queue->nodes+node; - - /* First delete the node */ - if (newnode->prev != NULL) - newnode->prev->next = newnode->next; - else - buckets[oldgain] = newnode->next; - if (newnode->next != NULL) - newnode->next->prev = newnode->prev; - - /* Attach this node in the doubly-linked list */ - newnode->next = buckets[newgain]; - newnode->prev = NULL; - if (newnode->next != NULL) - newnode->next->prev = newnode; - buckets[newgain] = newnode; - - if (queue->maxgain < newgain) - queue->maxgain = newgain; - } - else { /* Heap Priority Queue */ - heap = queue->heap; - locator = queue->locator; - - ASSERT(locator[node] != -1); - ASSERT(heap[locator[node]].val == node); - ASSERT(heap[locator[node]].key == oldgain); - ASSERT(CheckHeap(queue)); - - - /* Here we are just filtering up since the newgain is greater than the oldgain */ - i = locator[node]; - while (i > 0) { - j = (i-1)>>1; - if (heap[j].key < newgain) { - heap[i] = heap[j]; - locator[heap[i].val] = i; - i = j; - } - else - break; - } - - heap[i].key = newgain; - heap[i].val = node; - locator[node] = i; - - ASSERT(CheckHeap(queue)); - } - -} - - -/************************************************************************* -* This function returns the vertex with the largest gain from a partition -* and removes the node from the bucket list -**************************************************************************/ -int PQueueGetMax(PQueueType *queue) -{ - int vtx, i, j, gain, node; - idxtype *locator; - ListNodeType *tptr; - KeyValueType *heap; - - if (queue->nnodes == 0) - return -1; - - queue->nnodes--; - - if (queue->type == 1) { - tptr = queue->buckets[queue->maxgain]; - queue->buckets[queue->maxgain] = tptr->next; - if (tptr->next != NULL) { - tptr->next->prev = NULL; - } - else { - if (queue->nnodes == 0) { - queue->maxgain = -queue->ngainspan; - } - else - for (; queue->buckets[queue->maxgain]==NULL; queue->maxgain--); - } - - return tptr->id; - } - else { - heap = queue->heap; - locator = queue->locator; - - vtx = heap[0].val; - locator[vtx] = -1; - - if ((i = queue->nnodes) > 0) { - gain = heap[i].key; - node = heap[i].val; - i = 0; - while ((j=2*i+1) < queue->nnodes) { - if (heap[j].key > gain) { - if (j+1 < queue->nnodes && heap[j+1].key > heap[j].key) - j = j+1; - heap[i] = heap[j]; - locator[heap[i].val] = i; - i = j; - } - else if (j+1 < queue->nnodes && heap[j+1].key > gain) { - j = j+1; - heap[i] = heap[j]; - locator[heap[i].val] = i; - i = j; - } - else - break; - } - - heap[i].key = gain; - heap[i].val = node; - locator[node] = i; - } - - ASSERT(CheckHeap(queue)); - return vtx; - } -} - - -/************************************************************************* -* This function returns the vertex with the largest gain from a partition -**************************************************************************/ -int PQueueSeeMax(PQueueType *queue) -{ - int vtx; - - if (queue->nnodes == 0) - return -1; - - if (queue->type == 1) - vtx = queue->buckets[queue->maxgain]->id; - else - vtx = queue->heap[0].val; - - return vtx; -} - - -/************************************************************************* -* This function returns the vertex with the largest gain from a partition -**************************************************************************/ -int PQueueGetKey(PQueueType *queue) -{ - int key; - - if (queue->nnodes == 0) - return -1; - - if (queue->type == 1) - key = queue->maxgain; - else - key = queue->heap[0].key; - - return key; -} - - - - -/************************************************************************* -* This functions checks the consistency of the heap -**************************************************************************/ -int CheckHeap(PQueueType *queue) -{ - int i, j, nnodes; - idxtype *locator; - KeyValueType *heap; - - heap = queue->heap; - locator = queue->locator; - nnodes = queue->nnodes; - - if (nnodes == 0) - return 1; - - ASSERT(locator[heap[0].val] == 0); - for (i=1; imaxnodes; i++) { - if (locator[i] != -1) - j++; - } - ASSERTP(j == nnodes, ("%d %d\n", j, nnodes)); - - return 1; -} diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/proto.h b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/proto.h index d28329ff..d4ef595d 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/proto.h +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/proto.h @@ -8,511 +8,346 @@ * Started 10/19/95 * George * - * $Id: proto.h,v 1.3 2003/07/24 18:39:11 karypis Exp $ + * $Id: proto.h 20398 2016-11-22 17:17:12Z karypis $ * */ +#ifndef _LIBMETIS_PROTO_H_ +#define _LIBMETIS_PROTO_H_ + +/* auxapi.c */ + /* balance.c */ -void Balance2Way(CtrlType *, GraphType *, int *, float); -void Bnd2WayBalance(CtrlType *, GraphType *, int *); -void General2WayBalance(CtrlType *, GraphType *, int *); +void Balance2Way(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts); +void Bnd2WayBalance(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts); +void General2WayBalance(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts); +void McGeneral2WayBalance(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts); + /* bucketsort.c */ -void BucketSortKeysInc(int, int, idxtype *, idxtype *, idxtype *); +void BucketSortKeysInc(ctrl_t *ctrl, idx_t n, idx_t max, idx_t *keys, + idx_t *tperm, idx_t *perm); -/* ccgraph.c */ -void CreateCoarseGraph(CtrlType *, GraphType *, int, idxtype *, idxtype *); -void CreateCoarseGraphNoMask(CtrlType *, GraphType *, int, idxtype *, idxtype *); -void CreateCoarseGraph_NVW(CtrlType *, GraphType *, int, idxtype *, idxtype *); -GraphType *SetUpCoarseGraph(GraphType *, int, int); -void ReAdjustMemory(GraphType *, GraphType *, int); /* checkgraph.c */ -int CheckGraph(GraphType *); +int CheckGraph(graph_t *graph, int numflag, int verbose); +int CheckInputGraphWeights(idx_t nvtxs, idx_t ncon, idx_t *xadj, idx_t *adjncy, + idx_t *vwgt, idx_t *vsize, idx_t *adjwgt); +graph_t *FixGraph(graph_t *graph); + /* coarsen.c */ -GraphType *Coarsen2Way(CtrlType *, GraphType *); +graph_t *CoarsenGraph(ctrl_t *ctrl, graph_t *graph); +graph_t *CoarsenGraphNlevels(ctrl_t *ctrl, graph_t *graph, idx_t nlevels); +idx_t Match_RM(ctrl_t *ctrl, graph_t *graph); +idx_t Match_SHEM(ctrl_t *ctrl, graph_t *graph); +idx_t Match_2Hop(ctrl_t *ctrl, graph_t *graph, idx_t *perm, idx_t *match, + idx_t cnvtxs, size_t nunmatched); +idx_t Match_2HopAny(ctrl_t *ctrl, graph_t *graph, idx_t *perm, idx_t *match, + idx_t cnvtxs, size_t *r_nunmatched, size_t maxdegree); +idx_t Match_2HopAll(ctrl_t *ctrl, graph_t *graph, idx_t *perm, idx_t *match, + idx_t cnvtxs, size_t *r_nunmatched, size_t maxdegree); +idx_t Match_JC(ctrl_t *ctrl, graph_t *graph); +void PrintCGraphStats(ctrl_t *ctrl, graph_t *graph); +void CreateCoarseGraph(ctrl_t *ctrl, graph_t *graph, idx_t cnvtxs, + idx_t *match); +graph_t *SetupCoarseGraph(graph_t *graph, idx_t cnvtxs, int dovsize); +void ReAdjustMemory(ctrl_t *ctrl, graph_t *graph, graph_t *cgraph); + + /* compress.c */ -void CompressGraph(CtrlType *, GraphType *, int, idxtype *, idxtype *, idxtype *, idxtype *); -void PruneGraph(CtrlType *, GraphType *, int, idxtype *, idxtype *, idxtype *, float); +graph_t *CompressGraph(ctrl_t *ctrl, idx_t nvtxs, idx_t *xadj, idx_t *adjncy, + idx_t *vwgt, idx_t *cptr, idx_t *cind); +graph_t *PruneGraph(ctrl_t *ctrl, idx_t nvtxs, idx_t *xadj, idx_t *adjncy, + idx_t *vwgt, idx_t *iperm, real_t factor); + + +/* contig.c */ +idx_t FindPartitionInducedComponents(graph_t *graph, idx_t *where, + idx_t *cptr, idx_t *cind); +void ComputeBFSOrdering(ctrl_t *ctrl, graph_t *graph, idx_t *bfsperm); +idx_t IsConnected(graph_t *graph, idx_t report); +idx_t IsConnectedSubdomain(ctrl_t *, graph_t *, idx_t, idx_t); +idx_t FindSepInducedComponents(ctrl_t *, graph_t *, idx_t *, idx_t *); +void EliminateComponents(ctrl_t *ctrl, graph_t *graph); +void MoveGroupContigForCut(ctrl_t *ctrl, graph_t *graph, idx_t to, idx_t gid, + idx_t *ptr, idx_t *ind); +void MoveGroupContigForVol(ctrl_t *ctrl, graph_t *graph, idx_t to, idx_t gid, + idx_t *ptr, idx_t *ind, idx_t *vmarker, idx_t *pmarker, + idx_t *modind); + /* debug.c */ -int ComputeCut(GraphType *, idxtype *); -int CheckBnd(GraphType *); -int CheckBnd2(GraphType *); -int CheckNodeBnd(GraphType *, int); -int CheckRInfo(RInfoType *); -int CheckNodePartitionParams(GraphType *); -int IsSeparable(GraphType *); - -/* estmem.c */ -void METIS_EstimateMemory(int *, idxtype *, idxtype *, int *, int *, int *); -void EstimateCFraction(int, idxtype *, idxtype *, float *, float *); -int ComputeCoarseGraphSize(int, idxtype *, idxtype *, int, idxtype *, idxtype *, idxtype *); +idx_t ComputeCut(graph_t *graph, idx_t *where); +idx_t ComputeVolume(graph_t *, idx_t *); +idx_t ComputeMaxCut(graph_t *graph, idx_t nparts, idx_t *where); +idx_t CheckBnd(graph_t *); +idx_t CheckBnd2(graph_t *); +idx_t CheckNodeBnd(graph_t *, idx_t); +idx_t CheckRInfo(ctrl_t *ctrl, ckrinfo_t *rinfo); +idx_t CheckNodePartitionParams(graph_t *); +idx_t IsSeparable(graph_t *); +void CheckKWayVolPartitionParams(ctrl_t *ctrl, graph_t *graph); + /* fm.c */ -void FM_2WayEdgeRefine(CtrlType *, GraphType *, int *, int); +void FM_2WayRefine(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niter); +void FM_2WayCutRefine(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niter); +void FM_Mc2WayCutRefine(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niter); +void SelectQueue(graph_t *graph, real_t *pijbm, real_t *ubfactors, rpq_t **queues, + idx_t *from, idx_t *cnum); +void Print2WayRefineStats(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, + real_t deltabal, idx_t mincutorder); + /* fortran.c */ -void Change2CNumbering(int, idxtype *, idxtype *); -void Change2FNumbering(int, idxtype *, idxtype *, idxtype *); -void Change2FNumbering2(int, idxtype *, idxtype *); -void Change2FNumberingOrder(int, idxtype *, idxtype *, idxtype *, idxtype *); -void ChangeMesh2CNumbering(int, idxtype *); -void ChangeMesh2FNumbering(int, idxtype *, int, idxtype *, idxtype *); -void ChangeMesh2FNumbering2(int, idxtype *, int, int, idxtype *, idxtype *); - -/* frename.c */ -void METIS_PARTGRAPHRECURSIVE(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, int *, int *, idxtype *); -void metis_partgraphrecursive(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, int *, int *, idxtype *); -void metis_partgraphrecursive_(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, int *, int *, idxtype *); -void metis_partgraphrecursive__(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, int *, int *, idxtype *); -void METIS_WPARTGRAPHRECURSIVE(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -void metis_wpartgraphrecursive(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -void metis_wpartgraphrecursive_(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -void metis_wpartgraphrecursive__(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -void METIS_PARTGRAPHKWAY(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, int *, int *, idxtype *); -void metis_partgraphkway(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, int *, int *, idxtype *); -void metis_partgraphkway_(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, int *, int *, idxtype *); -void metis_partgraphkway__(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, int *, int *, idxtype *); -void METIS_WPARTGRAPHKWAY(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -void metis_wpartgraphkway(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -void metis_wpartgraphkway_(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -void metis_wpartgraphkway__(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -void METIS_EDGEND(int *, idxtype *, idxtype *, int *, int *, idxtype *, idxtype *); -void metis_edgend(int *, idxtype *, idxtype *, int *, int *, idxtype *, idxtype *); -void metis_edgend_(int *, idxtype *, idxtype *, int *, int *, idxtype *, idxtype *); -void metis_edgend__(int *, idxtype *, idxtype *, int *, int *, idxtype *, idxtype *); -void METIS_NODEND(int *, idxtype *, idxtype *, int *, int *, idxtype *, idxtype *); -void metis_nodend(int *, idxtype *, idxtype *, int *, int *, idxtype *, idxtype *); -void metis_nodend_(int *, idxtype *, idxtype *, int *, int *, idxtype *, idxtype *); -void metis_nodend__(int *, idxtype *, idxtype *, int *, int *, idxtype *, idxtype *); -void METIS_NODEWND(int *, idxtype *, idxtype *, idxtype *, int *, int *, idxtype *, idxtype *); -void metis_nodewnd(int *, idxtype *, idxtype *, idxtype *, int *, int *, idxtype *, idxtype *); -void metis_nodewnd_(int *, idxtype *, idxtype *, idxtype *, int *, int *, idxtype *, idxtype *); -void metis_nodewnd__(int *, idxtype *, idxtype *, idxtype *, int *, int *, idxtype *, idxtype *); -void METIS_PARTMESHNODAL(int *, int *, idxtype *, int *, int *, int *, int *, idxtype *, idxtype *); -void metis_partmeshnodal(int *, int *, idxtype *, int *, int *, int *, int *, idxtype *, idxtype *); -void metis_partmeshnodal_(int *, int *, idxtype *, int *, int *, int *, int *, idxtype *, idxtype *); -void metis_partmeshnodal__(int *, int *, idxtype *, int *, int *, int *, int *, idxtype *, idxtype *); -void METIS_PARTMESHDUAL(int *, int *, idxtype *, int *, int *, int *, int *, idxtype *, idxtype *); -void metis_partmeshdual(int *, int *, idxtype *, int *, int *, int *, int *, idxtype *, idxtype *); -void metis_partmeshdual_(int *, int *, idxtype *, int *, int *, int *, int *, idxtype *, idxtype *); -void metis_partmeshdual__(int *, int *, idxtype *, int *, int *, int *, int *, idxtype *, idxtype *); -void METIS_MESHTONODAL(int *, int *, idxtype *, int *, int *, idxtype *, idxtype *); -void metis_meshtonodal(int *, int *, idxtype *, int *, int *, idxtype *, idxtype *); -void metis_meshtonodal_(int *, int *, idxtype *, int *, int *, idxtype *, idxtype *); -void metis_meshtonodal__(int *, int *, idxtype *, int *, int *, idxtype *, idxtype *); -void METIS_MESHTODUAL(int *, int *, idxtype *, int *, int *, idxtype *, idxtype *); -void metis_meshtodual(int *, int *, idxtype *, int *, int *, idxtype *, idxtype *); -void metis_meshtodual_(int *, int *, idxtype *, int *, int *, idxtype *, idxtype *); -void metis_meshtodual__(int *, int *, idxtype *, int *, int *, idxtype *, idxtype *); -void METIS_ESTIMATEMEMORY(int *, idxtype *, idxtype *, int *, int *, int *); -void metis_estimatememory(int *, idxtype *, idxtype *, int *, int *, int *); -void metis_estimatememory_(int *, idxtype *, idxtype *, int *, int *, int *); -void metis_estimatememory__(int *, idxtype *, idxtype *, int *, int *, int *); -void METIS_MCPARTGRAPHRECURSIVE(int *, int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, int *, int *, idxtype *); -void metis_mcpartgraphrecursive(int *, int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, int *, int *, idxtype *); -void metis_mcpartgraphrecursive_(int *, int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, int *, int *, idxtype *); -void metis_mcpartgraphrecursive__(int *, int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, int *, int *, idxtype *); -void METIS_MCPARTGRAPHKWAY(int *, int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -void metis_mcpartgraphkway(int *, int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -void metis_mcpartgraphkway_(int *, int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -void metis_mcpartgraphkway__(int *, int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -void METIS_PARTGRAPHVKWAY(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, int *, int *, idxtype *); -void metis_partgraphvkway(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, int *, int *, idxtype *); -void metis_partgraphvkway_(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, int *, int *, idxtype *); -void metis_partgraphvkway__(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, int *, int *, idxtype *); -void METIS_WPARTGRAPHVKWAY(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -void metis_wpartgraphvkway(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -void metis_wpartgraphvkway_(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -void metis_wpartgraphvkway__(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); +void Change2CNumbering(idx_t, idx_t *, idx_t *); +void Change2FNumbering(idx_t, idx_t *, idx_t *, idx_t *); +void Change2FNumbering2(idx_t, idx_t *, idx_t *); +void Change2FNumberingOrder(idx_t, idx_t *, idx_t *, idx_t *, idx_t *); +void ChangeMesh2CNumbering(idx_t n, idx_t *ptr, idx_t *ind); +void ChangeMesh2FNumbering(idx_t n, idx_t *ptr, idx_t *ind, idx_t nvtxs, + idx_t *xadj, idx_t *adjncy); +void ChangeMesh2FNumbering2(idx_t ne, idx_t nn, idx_t *ptr, idx_t *ind, + idx_t *epart, idx_t *npart); + /* graph.c */ -void SetUpGraph(GraphType *, int, int, int, idxtype *, idxtype *, idxtype *, idxtype *, int); -void SetUpGraphKway(GraphType *, int, idxtype *, idxtype *); -void SetUpGraph2(GraphType *, int, int, idxtype *, idxtype *, float *, idxtype *); -void VolSetUpGraph(GraphType *, int, int, int, idxtype *, idxtype *, idxtype *, idxtype *, int); -void RandomizeGraph(GraphType *); -int IsConnectedSubdomain(CtrlType *, GraphType *, int, int); -int IsConnected(CtrlType *, GraphType *, int); -int IsConnected2(GraphType *, int); -int FindComponents(CtrlType *, GraphType *, idxtype *, idxtype *); +graph_t *SetupGraph(ctrl_t *ctrl, idx_t nvtxs, idx_t ncon, idx_t *xadj, + idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt); +void SetupGraph_tvwgt(graph_t *graph); +void SetupGraph_label(graph_t *graph); +graph_t *SetupSplitGraph(graph_t *graph, idx_t snvtxs, idx_t snedges); +graph_t *CreateGraph(void); +void InitGraph(graph_t *graph); +void FreeSData(graph_t *graph); +void FreeRData(graph_t *graph); +void FreeGraph(graph_t **graph); +void graph_WriteToDisk(ctrl_t *ctrl, graph_t *graph); +void graph_ReadFromDisk(ctrl_t *ctrl, graph_t *graph); + /* initpart.c */ -void Init2WayPartition(CtrlType *, GraphType *, int *, float); -void InitSeparator(CtrlType *, GraphType *, float); -void GrowBisection(CtrlType *, GraphType *, int *, float); -void GrowBisectionNode(CtrlType *, GraphType *, float); -void RandomBisection(CtrlType *, GraphType *, int *, float); +void Init2WayPartition(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niparts); +void InitSeparator(ctrl_t *ctrl, graph_t *graph, idx_t niparts); +void RandomBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niparts); +void GrowBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niparts); +void McRandomBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niparts); +void McGrowBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niparts); +void GrowBisectionNode(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niparts); +void GrowBisectionNode2(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niparts); + /* kmetis.c */ -void METIS_PartGraphKway(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, int *, int *, idxtype *); -void METIS_WPartGraphKway(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -int MlevelKWayPartitioning(CtrlType *, GraphType *, int, idxtype *, float *, float); +idx_t MlevelKWayPartitioning(ctrl_t *ctrl, graph_t *graph, idx_t *part); +void InitKWayPartitioning(ctrl_t *ctrl, graph_t *graph); +idx_t BlockKWayPartitioning(ctrl_t *ctrl, graph_t *graph, idx_t *part); +idx_t GrowMultisection(ctrl_t *ctrl, graph_t *graph, idx_t nparts, idx_t *where); +void BalanceAndRefineLP(ctrl_t *ctrl, graph_t *graph, idx_t nparts, idx_t *where); -/* kvmetis.c */ -void METIS_PartGraphVKway(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, int *, int *, idxtype *); -void METIS_WPartGraphVKway(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -int MlevelVolKWayPartitioning(CtrlType *, GraphType *, int, idxtype *, float *, float); /* kwayfm.c */ -void Random_KWayEdgeRefine(CtrlType *, GraphType *, int, float *, float, int, int); -void Greedy_KWayEdgeRefine(CtrlType *, GraphType *, int, float *, float, int); -void Greedy_KWayEdgeBalance(CtrlType *, GraphType *, int, float *, float, int); +void Greedy_KWayOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, + real_t ffactor, idx_t omode); +void Greedy_KWayCutOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, + real_t ffactor, idx_t omode); +void Greedy_KWayVolOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, + real_t ffactor, idx_t omode); +void Greedy_McKWayCutOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, + real_t ffactor, idx_t omode); +void Greedy_McKWayVolOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, + real_t ffactor, idx_t omode); +idx_t IsArticulationNode(idx_t i, idx_t *xadj, idx_t *adjncy, idx_t *where, + idx_t *bfslvl, idx_t *bfsind, idx_t *bfsmrk); +void KWayVolUpdate(ctrl_t *ctrl, graph_t *graph, idx_t v, idx_t from, + idx_t to, ipq_t *queue, idx_t *vstatus, idx_t *r_nupd, idx_t *updptr, + idx_t *updind, idx_t bndtype, idx_t *vmarker, idx_t *pmarker, + idx_t *modind); +void Greedy_KWayEdgeStats(ctrl_t *ctrl, graph_t *graph); +void Greedy_KWayEdgeCutOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter); + /* kwayrefine.c */ -void RefineKWay(CtrlType *, GraphType *, GraphType *, int, float *, float); -void AllocateKWayPartitionMemory(CtrlType *, GraphType *, int); -void ComputeKWayPartitionParams(CtrlType *, GraphType *, int); -void ProjectKWayPartition(CtrlType *, GraphType *, int); -int IsBalanced(idxtype *, int, float *, float); -void ComputeKWayBoundary(CtrlType *, GraphType *, int); -void ComputeKWayBalanceBoundary(CtrlType *, GraphType *, int); - -/* kwayvolfm.c */ -void Random_KWayVolRefine(CtrlType *, GraphType *, int, float *, float, int, int); -void Random_KWayVolRefineMConn(CtrlType *, GraphType *, int, float *, float, int, int); -void Greedy_KWayVolBalance(CtrlType *, GraphType *, int, float *, float, int); -void Greedy_KWayVolBalanceMConn(CtrlType *, GraphType *, int, float *, float, int); -void KWayVolUpdate(CtrlType *, GraphType *, int, int, int, idxtype *, idxtype *, idxtype *); -void ComputeKWayVolume(GraphType *, int, idxtype *, idxtype *, idxtype *); -int ComputeVolume(GraphType *, idxtype *); -void CheckVolKWayPartitionParams(CtrlType *, GraphType *, int); -void ComputeVolSubDomainGraph(GraphType *, int, idxtype *, idxtype *); -void EliminateVolSubDomainEdges(CtrlType *, GraphType *, int, float *); -void EliminateVolComponents(CtrlType *, GraphType *, int, float *, float); - -/* kwayvolrefine.c */ -void RefineVolKWay(CtrlType *, GraphType *, GraphType *, int, float *, float); -void AllocateVolKWayPartitionMemory(CtrlType *, GraphType *, int); -void ComputeVolKWayPartitionParams(CtrlType *, GraphType *, int); -void ComputeKWayVolGains(CtrlType *, GraphType *, int); -void ProjectVolKWayPartition(CtrlType *, GraphType *, int); -void ComputeVolKWayBoundary(CtrlType *, GraphType *, int); -void ComputeVolKWayBalanceBoundary(CtrlType *, GraphType *, int); - -/* match.c */ -void Match_RM(CtrlType *, GraphType *); -void Match_RM_NVW(CtrlType *, GraphType *); -void Match_HEM(CtrlType *, GraphType *); -void Match_SHEM(CtrlType *, GraphType *); - -/* mbalance.c */ -void MocBalance2Way(CtrlType *, GraphType *, float *, float); -void MocGeneral2WayBalance(CtrlType *, GraphType *, float *, float); - -/* mbalance2.c */ -void MocBalance2Way2(CtrlType *, GraphType *, float *, float *); -void MocGeneral2WayBalance2(CtrlType *, GraphType *, float *, float *); -void SelectQueue3(int, float *, float *, int *, int *, PQueueType [MAXNCON][2], float *); - -/* mcoarsen.c */ -GraphType *MCCoarsen2Way(CtrlType *, GraphType *); - -/* memory.c */ -void AllocateWorkSpace(CtrlType *, GraphType *, int); -void FreeWorkSpace(CtrlType *, GraphType *); -int WspaceAvail(CtrlType *); -idxtype *idxwspacemalloc(CtrlType *, int); -void idxwspacefree(CtrlType *, int); -float *fwspacemalloc(CtrlType *, int); -void fwspacefree(CtrlType *, int); -GraphType *CreateGraph(void); -void InitGraph(GraphType *); -void FreeGraph(GraphType *); +void RefineKWay(ctrl_t *ctrl, graph_t *orggraph, graph_t *graph); +void AllocateKWayPartitionMemory(ctrl_t *ctrl, graph_t *graph); +void ComputeKWayPartitionParams(ctrl_t *ctrl, graph_t *graph); +void ProjectKWayPartition(ctrl_t *ctrl, graph_t *graph); +void ComputeKWayBoundary(ctrl_t *ctrl, graph_t *graph, idx_t bndtype); +void ComputeKWayVolGains(ctrl_t *ctrl, graph_t *graph); +int IsBalanced(ctrl_t *ctrl, graph_t *graph, real_t ffactor); + + +/* mcutil.c */ +int rvecle(idx_t n, real_t *x, real_t *y); +int rvecge(idx_t n, real_t *x, real_t *y); +int rvecsumle(idx_t n, real_t *x1, real_t *x2, real_t *y); +real_t rvecmaxdiff(idx_t n, real_t *x, real_t *y); +int ivecle(idx_t n, idx_t *x, idx_t *z); +int ivecge(idx_t n, idx_t *x, idx_t *z); +int ivecaxpylez(idx_t n, idx_t a, idx_t *x, idx_t *y, idx_t *z); +int ivecaxpygez(idx_t n, idx_t a, idx_t *x, idx_t *y, idx_t *z); +int BetterVBalance(idx_t ncon, real_t *itvwgt, idx_t *v_vwgt, idx_t *u1_vwgt, + idx_t *u2_vwgt); +int BetterBalance2Way(idx_t n, real_t *x, real_t *y); +int BetterBalanceKWay(idx_t ncon, idx_t *vwgt, real_t *itvwgt, idx_t a1, + idx_t *pt1, real_t *bm1, idx_t a2, idx_t *pt2, real_t *bm2); +real_t ComputeLoadImbalance(graph_t *graph, idx_t nparts, real_t *pijbm); +real_t ComputeLoadImbalanceDiff(graph_t *graph, idx_t nparts, real_t *pijbm, + real_t *ubvec); +real_t ComputeLoadImbalanceDiffVec(graph_t *graph, idx_t nparts, real_t *pijbm, + real_t *ubfactors, real_t *diffvec); +void ComputeLoadImbalanceVec(graph_t *graph, idx_t nparts, real_t *pijbm, + real_t *lbvec); + /* mesh.c */ -void METIS_MeshToDual(int *, int *, idxtype *, int *, int *, idxtype *, idxtype *); -void METIS_MeshToNodal(int *, int *, idxtype *, int *, int *, idxtype *, idxtype *); -void GENDUALMETIS(int, int, int, idxtype *, idxtype *, idxtype *adjncy); -void TRINODALMETIS(int, int, idxtype *, idxtype *, idxtype *adjncy); -void TETNODALMETIS(int, int, idxtype *, idxtype *, idxtype *adjncy); -void HEXNODALMETIS(int, int, idxtype *, idxtype *, idxtype *adjncy); -void QUADNODALMETIS(int, int, idxtype *, idxtype *, idxtype *adjncy); +void CreateGraphDual(idx_t ne, idx_t nn, idx_t *eptr, idx_t *eind, idx_t ncommon, + idx_t **r_xadj, idx_t **r_adjncy); +idx_t FindCommonElements(idx_t qid, idx_t elen, idx_t *eind, idx_t *nptr, + idx_t *nind, idx_t *eptr, idx_t ncommon, idx_t *marker, idx_t *nbrs); +void CreateGraphNodal(idx_t ne, idx_t nn, idx_t *eptr, idx_t *eind, idx_t **r_xadj, + idx_t **r_adjncy); +idx_t FindCommonNodes(idx_t qid, idx_t nelmnts, idx_t *elmntids, idx_t *eptr, + idx_t *eind, idx_t *marker, idx_t *nbrs); +mesh_t *CreateMesh(void); +void InitMesh(mesh_t *mesh); +void FreeMesh(mesh_t **mesh); + /* meshpart.c */ -void METIS_PartMeshNodal(int *, int *, idxtype *, int *, int *, int *, int *, idxtype *, idxtype *); -void METIS_PartMeshDual(int *, int *, idxtype *, int *, int *, int *, int *, idxtype *, idxtype *); +void InduceRowPartFromColumnPart(idx_t nrows, idx_t *rowptr, idx_t *rowind, + idx_t *rpart, idx_t *cpart, idx_t nparts, real_t *tpwgts); -/* mfm.c */ -void MocFM_2WayEdgeRefine(CtrlType *, GraphType *, float *, int); -void SelectQueue(int, float *, float *, int *, int *, PQueueType [MAXNCON][2]); -int BetterBalance(int, float *, float *, float *); -float Compute2WayHLoadImbalance(int, float *, float *); -void Compute2WayHLoadImbalanceVec(int, float *, float *, float *); -/* mfm2.c */ -void MocFM_2WayEdgeRefine2(CtrlType *, GraphType *, float *, float *, int); -void SelectQueue2(int, float *, float *, int *, int *, PQueueType [MAXNCON][2], float *); -int IsBetter2wayBalance(int, float *, float *, float *); +/* minconn.c */ +void ComputeSubDomainGraph(ctrl_t *ctrl, graph_t *graph); +void UpdateEdgeSubDomainGraph(ctrl_t *ctrl, idx_t u, idx_t v, idx_t ewgt, + idx_t *r_maxndoms); +void PrintSubDomainGraph(graph_t *graph, idx_t nparts, idx_t *where); +void EliminateSubDomainEdges(ctrl_t *ctrl, graph_t *graph); +void MoveGroupMinConnForCut(ctrl_t *ctrl, graph_t *graph, idx_t to, idx_t nind, + idx_t *ind); +void MoveGroupMinConnForVol(ctrl_t *ctrl, graph_t *graph, idx_t to, idx_t nind, + idx_t *ind, idx_t *vmarker, idx_t *pmarker, idx_t *modind); + /* mincover.o */ -void MinCover(idxtype *, idxtype *, int, int, idxtype *, int *); -int MinCover_Augment(idxtype *, idxtype *, int, idxtype *, idxtype *, idxtype *, int); -void MinCover_Decompose(idxtype *, idxtype *, int, int, idxtype *, idxtype *, int *); -void MinCover_ColDFS(idxtype *, idxtype *, int, idxtype *, idxtype *, int); -void MinCover_RowDFS(idxtype *, idxtype *, int, idxtype *, idxtype *, int); - -/* minitpart.c */ -void MocInit2WayPartition(CtrlType *, GraphType *, float *, float); -void MocGrowBisection(CtrlType *, GraphType *, float *, float); -void MocRandomBisection(CtrlType *, GraphType *, float *, float); -void MocInit2WayBalance(CtrlType *, GraphType *, float *); -int SelectQueueoneWay(int, float *, float *, int, PQueueType [MAXNCON][2]); - -/* minitpart2.c */ -void MocInit2WayPartition2(CtrlType *, GraphType *, float *, float *); -void MocGrowBisection2(CtrlType *, GraphType *, float *, float *); -void MocGrowBisectionNew2(CtrlType *, GraphType *, float *, float *); -void MocInit2WayBalance2(CtrlType *, GraphType *, float *, float *); -int SelectQueueOneWay2(int, float *, PQueueType [MAXNCON][2], float *); - -/* mkmetis.c */ -void METIS_mCPartGraphKway(int *, int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -int MCMlevelKWayPartitioning(CtrlType *, GraphType *, int, idxtype *, float *); - -/* mkwayfmh.c */ -void MCRandom_KWayEdgeRefineHorizontal(CtrlType *, GraphType *, int, float *, int); -void MCGreedy_KWayEdgeBalanceHorizontal(CtrlType *, GraphType *, int, float *, int); -int AreAllHVwgtsBelow(int, float, float *, float, float *, float *); -int AreAllHVwgtsAbove(int, float, float *, float, float *, float *); -void ComputeHKWayLoadImbalance(int, int, float *, float *); -int MocIsHBalanced(int, int, float *, float *); -int IsHBalanceBetterFT(int, int, float *, float *, float *, float *); -int IsHBalanceBetterTT(int, int, float *, float *, float *, float *); - -/* mkwayrefine.c */ -void MocRefineKWayHorizontal(CtrlType *, GraphType *, GraphType *, int, float *); -void MocAllocateKWayPartitionMemory(CtrlType *, GraphType *, int); -void MocComputeKWayPartitionParams(CtrlType *, GraphType *, int); -void MocProjectKWayPartition(CtrlType *, GraphType *, int); -void MocComputeKWayBalanceBoundary(CtrlType *, GraphType *, int); - -/* mmatch.c */ -void MCMatch_RM(CtrlType *, GraphType *); -void MCMatch_HEM(CtrlType *, GraphType *); -void MCMatch_SHEM(CtrlType *, GraphType *); -void MCMatch_SHEBM(CtrlType *, GraphType *, int); -void MCMatch_SBHEM(CtrlType *, GraphType *, int); -float BetterVBalance(int, int, float *, float *, float *); -int AreAllVwgtsBelowFast(int, float *, float *, float); +void MinCover(idx_t *, idx_t *, idx_t, idx_t, idx_t *, idx_t *); +idx_t MinCover_Augment(idx_t *, idx_t *, idx_t, idx_t *, idx_t *, idx_t *, idx_t); +void MinCover_Decompose(idx_t *, idx_t *, idx_t, idx_t, idx_t *, idx_t *, idx_t *); +void MinCover_ColDFS(idx_t *, idx_t *, idx_t, idx_t *, idx_t *, idx_t); +void MinCover_RowDFS(idx_t *, idx_t *, idx_t, idx_t *, idx_t *, idx_t); + /* mmd.c */ -void genmmd(int, idxtype *, idxtype *, idxtype *, idxtype *, int , idxtype *, idxtype *, idxtype *, idxtype *, int, int *); -void mmdelm(int, idxtype *xadj, idxtype *, idxtype *, idxtype *, idxtype *, idxtype *, idxtype *, idxtype *, int, int); -int mmdint(int, idxtype *xadj, idxtype *, idxtype *, idxtype *, idxtype *, idxtype *, idxtype *, idxtype *); -void mmdnum(int, idxtype *, idxtype *, idxtype *); -void mmdupd(int, int, idxtype *, idxtype *, int, int *, idxtype *, idxtype *, idxtype *, idxtype *, idxtype *, idxtype *, int, int *tag); - -/* mpmetis.c */ -void METIS_mCPartGraphRecursive(int *, int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, int *, int *, idxtype *); -void METIS_mCHPartGraphRecursive(int *, int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -void METIS_mCPartGraphRecursiveInternal(int *, int *, idxtype *, idxtype *, float *, idxtype *, int *, int *, int *, idxtype *); -void METIS_mCHPartGraphRecursiveInternal(int *, int *, idxtype *, idxtype *, float *, idxtype *, int *, float *, int *, int *, idxtype *); -int MCMlevelRecursiveBisection(CtrlType *, GraphType *, int, idxtype *, float, int); -int MCHMlevelRecursiveBisection(CtrlType *, GraphType *, int, idxtype *, float *, int); -void MCMlevelEdgeBisection(CtrlType *, GraphType *, float *, float); -void MCHMlevelEdgeBisection(CtrlType *, GraphType *, float *, float *); - -/* mrefine.c */ -void MocRefine2Way(CtrlType *, GraphType *, GraphType *, float *, float); -void MocAllocate2WayPartitionMemory(CtrlType *, GraphType *); -void MocCompute2WayPartitionParams(CtrlType *, GraphType *); -void MocProject2WayPartition(CtrlType *, GraphType *); - -/* mrefine2.c */ -void MocRefine2Way2(CtrlType *, GraphType *, GraphType *, float *, float *); - -/* mutil.c */ -int AreAllVwgtsBelow(int, float, float *, float, float *, float); -int AreAnyVwgtsBelow(int, float, float *, float, float *, float); -int AreAllVwgtsAbove(int, float, float *, float, float *, float); -float ComputeLoadImbalance(int, int, float *, float *); -int AreAllBelow(int, float *, float *); - -/* myqsort.c */ -void iidxsort(int, idxtype *); -void iintsort(int, int *); -void ikeysort(int, KeyValueType *); -void ikeyvalsort(int, KeyValueType *); +void genmmd(idx_t, idx_t *, idx_t *, idx_t *, idx_t *, idx_t , idx_t *, idx_t *, idx_t *, idx_t *, idx_t, idx_t *); +void mmdelm(idx_t, idx_t *xadj, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t, idx_t); +idx_t mmdint(idx_t, idx_t *xadj, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *); +void mmdnum(idx_t, idx_t *, idx_t *, idx_t *); +void mmdupd(idx_t, idx_t, idx_t *, idx_t *, idx_t, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t, idx_t *tag); -/* ometis.c */ -void METIS_EdgeND(int *, idxtype *, idxtype *, int *, int *, idxtype *, idxtype *); -void METIS_NodeND(int *, idxtype *, idxtype *, int *, int *, idxtype *, idxtype *); -void METIS_NodeWND(int *, idxtype *, idxtype *, idxtype *, int *, int *, idxtype *, idxtype *); -void MlevelNestedDissection(CtrlType *, GraphType *, idxtype *, float, int); -void MlevelNestedDissectionCC(CtrlType *, GraphType *, idxtype *, float, int); -void MlevelNodeBisectionMultiple(CtrlType *, GraphType *, int *, float); -void MlevelNodeBisection(CtrlType *, GraphType *, int *, float); -void SplitGraphOrder(CtrlType *, GraphType *, GraphType *, GraphType *); -void MMDOrder(CtrlType *, GraphType *, idxtype *, int); -int SplitGraphOrderCC(CtrlType *, GraphType *, GraphType *, int, idxtype *, idxtype *); -/* parmetis.c */ -void METIS_PartGraphKway2(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, int *, int *, idxtype *); -void METIS_WPartGraphKway2(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -void METIS_NodeNDP(int, idxtype *, idxtype *, int, int *, idxtype *, idxtype *, idxtype *); -void MlevelNestedDissectionP(CtrlType *, GraphType *, idxtype *, int, int, int, idxtype *); -void METIS_NodeComputeSeparator(int *, idxtype *, idxtype *, idxtype *, idxtype *, float *, int *, int *, idxtype *); -void METIS_EdgeComputeSeparator(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, idxtype *); -void METIS_mCPartGraphRecursive2(int *nvtxs, int *ncon, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, float *tpwgts, int *options, int *edgecut, idxtype *part); -int MCMlevelRecursiveBisection2(CtrlType *ctrl, GraphType *graph, int nparts, float *tpwgts, idxtype *part, float ubfactor, int fpart); -void METIS_NodeRefine(int nvtxs, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, - idxtype *adjwgt, idxtype *where, idxtype *hmarker, float ubfactor); -void FM_2WayNodeRefine_OneSidedP(CtrlType *ctrl, GraphType *graph, idxtype *hmarker, - float ubfactor, int npasses); -void FM_2WayNodeRefine_TwoSidedP(CtrlType *ctrl, GraphType *graph, idxtype *hmarker, - float ubfactor, int npasses); +/* ometis.c */ +void MlevelNestedDissection(ctrl_t *ctrl, graph_t *graph, idx_t *order, + idx_t lastvtx); +void MlevelNestedDissectionCC(ctrl_t *ctrl, graph_t *graph, idx_t *order, + idx_t lastvtx); +void MlevelNodeBisectionMultiple(ctrl_t *ctrl, graph_t *graph); +void MlevelNodeBisectionL2(ctrl_t *ctrl, graph_t *graph, idx_t niparts); +void MlevelNodeBisectionL1(ctrl_t *ctrl, graph_t *graph, idx_t niparts); +void SplitGraphOrder(ctrl_t *ctrl, graph_t *graph, graph_t **r_lgraph, + graph_t **r_rgraph); +graph_t **SplitGraphOrderCC(ctrl_t *ctrl, graph_t *graph, idx_t ncmps, + idx_t *cptr, idx_t *cind); +void MMDOrder(ctrl_t *ctrl, graph_t *graph, idx_t *order, idx_t lastvtx); + + +/* options.c */ +ctrl_t *SetupCtrl(moptype_et optype, idx_t *options, idx_t ncon, idx_t nparts, + real_t *tpwgts, real_t *ubvec); +void SetupKWayBalMultipliers(ctrl_t *ctrl, graph_t *graph); +void Setup2WayBalMultipliers(ctrl_t *ctrl, graph_t *graph, real_t *tpwgts); +void PrintCtrl(ctrl_t *ctrl); +int CheckParams(ctrl_t *ctrl); +void FreeCtrl(ctrl_t **r_ctrl); +/* parmetis.c */ +void MlevelNestedDissectionP(ctrl_t *ctrl, graph_t *graph, idx_t *order, + idx_t lastvtx, idx_t npes, idx_t cpos, idx_t *sizes); +void FM_2WayNodeRefine1SidedP(ctrl_t *ctrl, graph_t *graph, idx_t *hmarker, + real_t ubfactor, idx_t npasses); +void FM_2WayNodeRefine2SidedP(ctrl_t *ctrl, graph_t *graph, idx_t *hmarker, + real_t ubfactor, idx_t npasses); /* pmetis.c */ -void METIS_PartGraphRecursive(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, int *, int *, idxtype *); -void METIS_WPartGraphRecursive(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -int MlevelRecursiveBisection(CtrlType *, GraphType *, int, idxtype *, float *, float, int); -void MlevelEdgeBisection(CtrlType *, GraphType *, int *, float); -void SplitGraphPart(CtrlType *, GraphType *, GraphType *, GraphType *); -void SetUpSplitGraph(GraphType *, GraphType *, int, int); - -/* pqueue.c */ -void PQueueInit(CtrlType *ctrl, PQueueType *, int, int); -void PQueueReset(PQueueType *); -void PQueueFree(CtrlType *ctrl, PQueueType *); -int PQueueGetSize(PQueueType *); -int PQueueInsert(PQueueType *, int, int); -int PQueueDelete(PQueueType *, int, int); -int PQueueUpdate(PQueueType *, int, int, int); -void PQueueUpdateUp(PQueueType *, int, int, int); -int PQueueGetMax(PQueueType *); -int PQueueSeeMax(PQueueType *); -int PQueueGetKey(PQueueType *); -int CheckHeap(PQueueType *); +idx_t MlevelRecursiveBisection(ctrl_t *ctrl, graph_t *graph, idx_t nparts, + idx_t *part, real_t *tpwgts, idx_t fpart); +idx_t MultilevelBisect(ctrl_t *ctrl, graph_t *graph, real_t *tpwgts); +void SplitGraphPart(ctrl_t *ctrl, graph_t *graph, graph_t **r_lgraph, graph_t **r_rgraph); + /* refine.c */ -void Refine2Way(CtrlType *, GraphType *, GraphType *, int *, float ubfactor); -void Allocate2WayPartitionMemory(CtrlType *, GraphType *); -void Compute2WayPartitionParams(CtrlType *, GraphType *); -void Project2WayPartition(CtrlType *, GraphType *); +void Refine2Way(ctrl_t *ctrl, graph_t *orggraph, graph_t *graph, real_t *rtpwgts); +void Allocate2WayPartitionMemory(ctrl_t *ctrl, graph_t *graph); +void Compute2WayPartitionParams(ctrl_t *ctrl, graph_t *graph); +void Project2WayPartition(ctrl_t *ctrl, graph_t *graph); + /* separator.c */ -void ConstructSeparator(CtrlType *, GraphType *, float); -void ConstructMinCoverSeparator0(CtrlType *, GraphType *, float); -void ConstructMinCoverSeparator(CtrlType *, GraphType *, float); +void ConstructSeparator(ctrl_t *ctrl, graph_t *graph); +void ConstructMinCoverSeparator(ctrl_t *ctrl, graph_t *graph); + /* sfm.c */ -void FM_2WayNodeRefine(CtrlType *, GraphType *, float, int); -void FM_2WayNodeRefineEqWgt(CtrlType *, GraphType *, int); -void FM_2WayNodeRefine_OneSided(CtrlType *, GraphType *, float, int); -void FM_2WayNodeBalance(CtrlType *, GraphType *, float); -int ComputeMaxNodeGain(int, idxtype *, idxtype *, idxtype *); +void FM_2WayNodeRefine2Sided(ctrl_t *ctrl, graph_t *graph, idx_t niter); +void FM_2WayNodeRefine1Sided(ctrl_t *ctrl, graph_t *graph, idx_t niter); +void FM_2WayNodeBalance(ctrl_t *ctrl, graph_t *graph); + /* srefine.c */ -void Refine2WayNode(CtrlType *, GraphType *, GraphType *, float); -void Allocate2WayNodePartitionMemory(CtrlType *, GraphType *); -void Compute2WayNodePartitionParams(CtrlType *, GraphType *); -void Project2WayNodePartition(CtrlType *, GraphType *); +void Refine2WayNode(ctrl_t *ctrl, graph_t *orggraph, graph_t *graph); +void Allocate2WayNodePartitionMemory(ctrl_t *ctrl, graph_t *graph); +void Compute2WayNodePartitionParams(ctrl_t *ctrl, graph_t *graph); +void Project2WayNodePartition(ctrl_t *ctrl, graph_t *graph); + /* stat.c */ -void ComputePartitionInfo(GraphType *, int, idxtype *); -void ComputePartitionInfoBipartite(GraphType *, int, idxtype *); -void ComputePartitionBalance(GraphType *, int, idxtype *, float *); -float ComputeElementBalance(int, int, idxtype *); -void Moc_ComputePartitionBalance(GraphType *graph, int nparts, idxtype *where, float *ubvec); - -/* subdomains.c */ -void Random_KWayEdgeRefineMConn(CtrlType *, GraphType *, int, float *, float, int, int); -void Greedy_KWayEdgeBalanceMConn(CtrlType *, GraphType *, int, float *, float, int); -void PrintSubDomainGraph(GraphType *, int, idxtype *); -void ComputeSubDomainGraph(GraphType *, int, idxtype *, idxtype *); -void EliminateSubDomainEdges(CtrlType *, GraphType *, int, float *); -void MoveGroupMConn(CtrlType *, GraphType *, idxtype *, idxtype *, int, int, int, idxtype *); -void EliminateComponents(CtrlType *, GraphType *, int, float *, float); -void MoveGroup(CtrlType *, GraphType *, int, int, int, idxtype *, idxtype *); +void ComputePartitionInfoBipartite(graph_t *, idx_t, idx_t *); +void ComputePartitionBalance(graph_t *, idx_t, idx_t *, real_t *); +real_t ComputeElementBalance(idx_t, idx_t, idx_t *); + /* timing.c */ -void InitTimers(CtrlType *); -void PrintTimers(CtrlType *); -double seconds(void); +void InitTimers(ctrl_t *); +void PrintTimers(ctrl_t *); /* util.c */ -void errexit(char *,...); -#ifndef DMALLOC -int *imalloc(int, char *); -idxtype *idxmalloc(int, char *); -float *fmalloc(int, char *); -int *ismalloc(int, int, char *); -idxtype *idxsmalloc(int, idxtype, char *); -void *GKmalloc(int, char *); -#endif -/*void GKfree(void **,...); */ -int *iset(int n, int val, int *x); -idxtype *idxset(int n, idxtype val, idxtype *x); -float *sset(int n, float val, float *x); -int iamax(int, int *); -int idxamax(int, idxtype *); -int idxamax_strd(int, idxtype *, int); -int samax(int, float *); -int samax2(int, float *); -int idxamin(int, idxtype *); -int samin(int, float *); -int idxsum(int, idxtype *); -int idxsum_strd(int, idxtype *, int); -void idxadd(int, idxtype *, idxtype *); -int charsum(int, char *); -int isum(int, int *); -float ssum(int, float *); -float ssum_strd(int n, float *x, int); -void sscale(int n, float, float *x); -float snorm2(int, float *); -float sdot(int n, float *, float *); -void saxpy(int, float, float *, int, float *, int); -void RandomPermute(int, idxtype *, int); -int ispow2(int); -void InitRandom(int); -int log2Int(int); - - - - - - - - - - -/*************************************************************** -* Programs Directory -****************************************************************/ - -/* io.c */ -void ReadGraph(GraphType *, char *, int *); -void WritePartition(char *, idxtype *, int, int); -void WriteMeshPartition(char *, int, int, idxtype *, int, idxtype *); -void WritePermutation(char *, idxtype *, int); -int CheckGraph(GraphType *); -idxtype *ReadMesh(char *, int *, int *, int *); -void WriteGraph(char *, int, idxtype *, idxtype *); - -/* smbfactor.c */ -void ComputeFillIn(GraphType *, idxtype *); -idxtype ComputeFillIn2(GraphType *, idxtype *); -int smbfct(int, idxtype *, idxtype *, idxtype *, idxtype *, idxtype *, int *, idxtype *, idxtype *, int *); - - -/*************************************************************** -* Test Directory -****************************************************************/ -void Test_PartGraph(int, idxtype *, idxtype *); -int VerifyPart(int, idxtype *, idxtype *, idxtype *, idxtype *, int, int, idxtype *); -int VerifyWPart(int, idxtype *, idxtype *, idxtype *, idxtype *, int, float *, int, idxtype *); -void Test_PartGraphV(int, idxtype *, idxtype *); -int VerifyPartV(int, idxtype *, idxtype *, idxtype *, idxtype *, int, int, idxtype *); -int VerifyWPartV(int, idxtype *, idxtype *, idxtype *, idxtype *, int, float *, int, idxtype *); -void Test_PartGraphmC(int, idxtype *, idxtype *); -int VerifyPartmC(int, int, idxtype *, idxtype *, idxtype *, idxtype *, int, float *, int, idxtype *); -void Test_ND(int, idxtype *, idxtype *); -int VerifyND(int, idxtype *, idxtype *); +idx_t iargmax_strd(size_t, idx_t *, idx_t); +idx_t iargmax_nrm(size_t n, idx_t *x, real_t *y); +idx_t iargmax2_nrm(size_t n, idx_t *x, real_t *y); +idx_t rargmax2(size_t, real_t *); +void InitRandom(idx_t); +int metis_rcode(int sigrval); + + + +/* wspace.c */ +void AllocateWorkSpace(ctrl_t *ctrl, graph_t *graph); +void AllocateRefinementWorkSpace(ctrl_t *ctrl, idx_t nbrpoolsize_max, idx_t nbrpoolsize); +void FreeWorkSpace(ctrl_t *ctrl); +void *wspacemalloc(ctrl_t *ctrl, size_t nbytes); +void wspacepush(ctrl_t *ctrl); +void wspacepop(ctrl_t *ctrl); +idx_t *iwspacemalloc(ctrl_t *, idx_t); +real_t *rwspacemalloc(ctrl_t *, idx_t); +ikv_t *ikvwspacemalloc(ctrl_t *, idx_t); +void cnbrpoolReset(ctrl_t *ctrl); +idx_t cnbrpoolGetNext(ctrl_t *ctrl, idx_t nnbrs); +void vnbrpoolReset(ctrl_t *ctrl); +idx_t vnbrpoolGetNext(ctrl_t *ctrl, idx_t nnbrs); + +#endif diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/refine.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/refine.c index f479298c..9a9fc0e3 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/refine.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/refine.c @@ -1,26 +1,23 @@ /* - * Copyright 1997, Regents of the University of Minnesota - * - * refine.c - * - * This file contains the driving routines for multilevel refinement - * - * Started 7/24/97 - * George - * - * $Id: refine.c,v 1.1 2003/07/16 15:55:17 karypis Exp $ - */ - -#include - - -/************************************************************************* -* This function is the entry point of refinement -**************************************************************************/ -void Refine2Way(CtrlType *ctrl, GraphType *orggraph, GraphType *graph, int *tpwgts, float ubfactor) +\file +\brief This file contains the driving routines for multilevel refinement + +\date Started 7/24/1997 +\author George +\author Copyright 1997-2009, Regents of the University of Minnesota +\version\verbatim $Id: refine.c 14362 2013-05-21 21:35:23Z karypis $ \endverbatim +*/ + +#include "metislib.h" + + +/*************************************************************************/ +/*! This function is the entry point of refinement */ +/*************************************************************************/ +void Refine2Way(ctrl_t *ctrl, graph_t *orggraph, graph_t *graph, real_t *tpwgts) { - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->UncoarsenTmr)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->UncoarsenTmr)); /* Compute the parameters of the coarsest graph */ Compute2WayPartitionParams(ctrl, graph); @@ -28,177 +25,192 @@ void Refine2Way(CtrlType *ctrl, GraphType *orggraph, GraphType *graph, int *tpwg for (;;) { ASSERT(CheckBnd(graph)); - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->RefTmr)); - switch (ctrl->RType) { - case 1: - Balance2Way(ctrl, graph, tpwgts, ubfactor); - FM_2WayEdgeRefine(ctrl, graph, tpwgts, 8); - break; - default: - errexit("Unknown refinement type: %d\n", ctrl->RType); - } - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->RefTmr)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->RefTmr)); + + Balance2Way(ctrl, graph, tpwgts); + + FM_2WayRefine(ctrl, graph, tpwgts, ctrl->niter); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->RefTmr)); if (graph == orggraph) break; graph = graph->finer; - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ProjectTmr)); + graph_ReadFromDisk(ctrl, graph); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->ProjectTmr)); Project2WayPartition(ctrl, graph); - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ProjectTmr)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->ProjectTmr)); } - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->UncoarsenTmr)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->UncoarsenTmr)); } -/************************************************************************* -* This function allocates memory for 2-way edge refinement -**************************************************************************/ -void Allocate2WayPartitionMemory(CtrlType *ctrl, GraphType *graph) +/*************************************************************************/ +/*! This function allocates memory for 2-way edge refinement */ +/*************************************************************************/ +void Allocate2WayPartitionMemory(ctrl_t *ctrl, graph_t *graph) { - int nvtxs; + idx_t nvtxs, ncon; nvtxs = graph->nvtxs; - - graph->rdata = idxmalloc(5*nvtxs+2, "Allocate2WayPartitionMemory: rdata"); - graph->pwgts = graph->rdata; - graph->where = graph->rdata + 2; - graph->id = graph->rdata + nvtxs + 2; - graph->ed = graph->rdata + 2*nvtxs + 2; - graph->bndptr = graph->rdata + 3*nvtxs + 2; - graph->bndind = graph->rdata + 4*nvtxs + 2; + ncon = graph->ncon; + + graph->pwgts = imalloc(2*ncon, "Allocate2WayPartitionMemory: pwgts"); + graph->where = imalloc(nvtxs, "Allocate2WayPartitionMemory: where"); + graph->bndptr = imalloc(nvtxs, "Allocate2WayPartitionMemory: bndptr"); + graph->bndind = imalloc(nvtxs, "Allocate2WayPartitionMemory: bndind"); + graph->id = imalloc(nvtxs, "Allocate2WayPartitionMemory: id"); + graph->ed = imalloc(nvtxs, "Allocate2WayPartitionMemory: ed"); } -/************************************************************************* -* This function computes the initial id/ed -**************************************************************************/ -void Compute2WayPartitionParams(CtrlType *ctrl, GraphType *graph) +/*************************************************************************/ +/*! This function computes the initial id/ed */ +/*************************************************************************/ +void Compute2WayPartitionParams(ctrl_t *ctrl, graph_t *graph) { - int i, j, k, l, nvtxs, nbnd, mincut; - idxtype *xadj, *vwgt, *adjncy, *adjwgt, *pwgts; - idxtype *id, *ed, *where; - idxtype *bndptr, *bndind; - int me, other; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - vwgt = graph->vwgt; + idx_t i, j, nvtxs, ncon, nbnd, mincut, istart, iend, tid, ted, me; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *pwgts; + idx_t *where, *bndptr, *bndind, *id, *ed; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; - where = graph->where; - pwgts = idxset(2, 0, graph->pwgts); - id = idxset(nvtxs, 0, graph->id); - ed = idxset(nvtxs, 0, graph->ed); - bndptr = idxset(nvtxs, -1, graph->bndptr); + where = graph->where; + id = graph->id; + ed = graph->ed; + + pwgts = iset(2*ncon, 0, graph->pwgts); + bndptr = iset(nvtxs, -1, graph->bndptr); bndind = graph->bndind; + /* Compute pwgts */ + if (ncon == 1) { + for (i=0; i= 0 && where[i] <= 1); + pwgts[where[i]] += vwgt[i]; + } + ASSERT(pwgts[0]+pwgts[1] == graph->tvwgt[0]); + } + else { + for (i=0; i= 0 && where[i] <= 1); me = where[i]; - pwgts[me] += vwgt[i]; + tid = ted = 0; - for (j=xadj[i]; j 0 || xadj[i] == xadj[i+1]) { - mincut += ed[i]; - bndptr[i] = nbnd; - bndind[nbnd++] = i; + id[i] = tid; + ed[i] = ted; + + if (ted > 0 || istart == iend) { + BNDInsert(nbnd, bndind, bndptr, i); + mincut += ted; } } graph->mincut = mincut/2; - graph->nbnd = nbnd; + graph->nbnd = nbnd; - ASSERT(pwgts[0]+pwgts[1] == idxsum(nvtxs, vwgt)); } - -/************************************************************************* -* This function projects a partition, and at the same time computes the -* parameters for refinement. -**************************************************************************/ -void Project2WayPartition(CtrlType *ctrl, GraphType *graph) +/*************************************************************************/ +/*! Projects a partition and computes the refinement params. */ +/*************************************************************************/ +void Project2WayPartition(ctrl_t *ctrl, graph_t *graph) { - int i, j, k, nvtxs, nbnd, me; - idxtype *xadj, *adjncy, *adjwgt, *adjwgtsum; - idxtype *cmap, *where, *id, *ed, *bndptr, *bndind; - idxtype *cwhere, *cid, *ced, *cbndptr; - GraphType *cgraph; - - cgraph = graph->coarser; - cwhere = cgraph->where; - cid = cgraph->id; - ced = cgraph->ed; + idx_t i, j, istart, iend, nvtxs, nbnd, me, tid, ted; + idx_t *xadj, *adjncy, *adjwgt; + idx_t *cmap, *where, *bndptr, *bndind; + idx_t *cwhere, *cbndptr; + idx_t *id, *ed; + graph_t *cgraph; + int dropedges; + + Allocate2WayPartitionMemory(ctrl, graph); + + dropedges = ctrl->dropedges; + + cgraph = graph->coarser; + cwhere = cgraph->where; cbndptr = cgraph->bndptr; - nvtxs = graph->nvtxs; - cmap = graph->cmap; - xadj = graph->xadj; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - adjwgtsum = graph->adjwgtsum; + nvtxs = graph->nvtxs; + cmap = graph->cmap; + xadj = graph->xadj; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; - Allocate2WayPartitionMemory(ctrl, graph); + where = graph->where; + id = graph->id; + ed = graph->ed; - where = graph->where; - id = idxset(nvtxs, 0, graph->id); - ed = idxset(nvtxs, 0, graph->ed); - bndptr = idxset(nvtxs, -1, graph->bndptr); + bndptr = iset(nvtxs, -1, graph->bndptr); bndind = graph->bndind; - - /* Go through and project partition and compute id/ed for the nodes */ + /* Project the partition and record which of these nodes came from the + coarser boundary */ for (i=0; i 0 || xadj[i] == xadj[i+1]) { - bndptr[i] = nbnd; - bndind[nbnd++] = i; - } + else { /* Potentially an interface node */ + me = where[i]; + for (j=istart; j 0 || istart == iend) + BNDInsert(nbnd, bndind, bndptr, i); } + graph->mincut = (dropedges ? ComputeCut(graph, where) : cgraph->mincut); + graph->nbnd = nbnd; - graph->mincut = cgraph->mincut; - graph->nbnd = nbnd; - idxcopy(2, cgraph->pwgts, graph->pwgts); + /* copy pwgts */ + icopy(2*graph->ncon, cgraph->pwgts, graph->pwgts); - FreeGraph(graph->coarser); + FreeGraph(&graph->coarser); graph->coarser = NULL; - } diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/rename.h b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/rename.h index ca07dfe4..a282639f 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/rename.h +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/rename.h @@ -8,417 +8,261 @@ * Started 10/2/97 * George * - * $Id: rename.h,v 1.2 2003/07/24 18:39:12 karypis Exp $ + * $Id: rename.h 20398 2016-11-22 17:17:12Z karypis $ * */ -/* balance.c */ -#define Balance2Way __Balance2Way -#define Bnd2WayBalance __Bnd2WayBalance -#define General2WayBalance __General2WayBalance - -/* bucketsort.c */ -#define BucketSortKeysInc __BucketSortKeysInc +#ifndef _LIBMETIS_RENAME_H_ +#define _LIBMETIS_RENAME_H_ -/* ccgraph.c */ -#define CreateCoarseGraph __CreateCoarseGraph -#define CreateCoarseGraphNoMask __CreateCoarseGraphNoMask -#define CreateCoarseGraph_NVW __CreateCoarseGraph_NVW -#define SetUpCoarseGraph __SetUpCoarseGraph -#define ReAdjustMemory __ReAdjustMemory +/* balance.c */ +#define Balance2Way libmetis__Balance2Way +#define Bnd2WayBalance libmetis__Bnd2WayBalance +#define General2WayBalance libmetis__General2WayBalance +#define McGeneral2WayBalance libmetis__McGeneral2WayBalance +/* bucketsort.c */ +#define BucketSortKeysInc libmetis__BucketSortKeysInc /* checkgraph.c */ -#define CheckGraph __CheckGraph - +#define CheckGraph libmetis__CheckGraph +#define CheckInputGraphWeights libmetis__CheckInputGraphWeights +#define FixGraph libmetis__FixGraph /* coarsen.c */ -#define Coarsen2Way __Coarsen2Way - +#define CoarsenGraph libmetis__CoarsenGraph +#define Match_RM libmetis__Match_RM +#define Match_SHEM libmetis__Match_SHEM +#define Match_2Hop libmetis__Match_2Hop +#define Match_2HopAny libmetis__Match_2HopAny +#define Match_2HopAll libmetis__Match_2HopAll +#define Match_JC libmetis__Match_JC +#define PrintCGraphStats libmetis__PrintCGraphStats +#define CreateCoarseGraph libmetis__CreateCoarseGraph +#define SetupCoarseGraph libmetis__SetupCoarseGraph +#define ReAdjustMemory libmetis__ReAdjustMemory /* compress.c */ -#define CompressGraph __CompressGraph -#define PruneGraph __PruneGraph - +#define CompressGraph libmetis__CompressGraph +#define PruneGraph libmetis__PruneGraph + +/* contig.c */ +#define FindPartitionInducedComponents libmetis__FindPartitionInducedComponents +#define IsConnected libmetis__IsConnected +#define IsConnectedSubdomain libmetis__IsConnectedSubdomain +#define FindSepInducedComponents libmetis__FindSepInducedComponents +#define EliminateComponents libmetis__EliminateComponents +#define MoveGroupContigForCut libmetis__MoveGroupContigForCut +#define MoveGroupContigForVol libmetis__MoveGroupContigForVol /* debug.c */ -#define ComputeCut __ComputeCut -#define CheckBnd __CheckBnd -#define CheckBnd2 __CheckBnd2 -#define CheckNodeBnd __CheckNodeBnd -#define CheckRInfo __CheckRInfo -#define CheckNodePartitionParams __CheckNodePartitionParams -#define IsSeparable __IsSeparable - - -/* estmem.c */ -#define EstimateCFraction __EstimateCFraction -#define ComputeCoarseGraphSize __ComputeCoarseGraphSize - +#define ComputeCut libmetis__ComputeCut +#define ComputeVolume libmetis__ComputeVolume +#define ComputeMaxCut libmetis__ComputeMaxCut +#define CheckBnd libmetis__CheckBnd +#define CheckBnd2 libmetis__CheckBnd2 +#define CheckNodeBnd libmetis__CheckNodeBnd +#define CheckRInfo libmetis__CheckRInfo +#define CheckNodePartitionParams libmetis__CheckNodePartitionParams +#define IsSeparable libmetis__IsSeparable +#define CheckKWayVolPartitionParams libmetis__CheckKWayVolPartitionParams /* fm.c */ -#define FM_2WayEdgeRefine __FM_2WayEdgeRefine - +#define FM_2WayRefine libmetis__FM_2WayRefine +#define FM_2WayCutRefine libmetis__FM_2WayCutRefine +#define FM_Mc2WayCutRefine libmetis__FM_Mc2WayCutRefine +#define SelectQueue libmetis__SelectQueue +#define Print2WayRefineStats libmetis__Print2WayRefineStats /* fortran.c */ -#define Change2CNumbering __Change2CNumbering -#define Change2FNumbering __Change2FNumbering -#define Change2FNumbering2 __Change2FNumbering2 -#define Change2FNumberingOrder __Change2FNumberingOrder -#define ChangeMesh2CNumbering __ChangeMesh2CNumbering -#define ChangeMesh2FNumbering __ChangeMesh2FNumbering -#define ChangeMesh2FNumbering2 __ChangeMesh2FNumbering2 - +#define Change2CNumbering libmetis__Change2CNumbering +#define Change2FNumbering libmetis__Change2FNumbering +#define Change2FNumbering2 libmetis__Change2FNumbering2 +#define Change2FNumberingOrder libmetis__Change2FNumberingOrder +#define ChangeMesh2CNumbering libmetis__ChangeMesh2CNumbering +#define ChangeMesh2FNumbering libmetis__ChangeMesh2FNumbering +#define ChangeMesh2FNumbering2 libmetis__ChangeMesh2FNumbering2 /* graph.c */ -#define SetUpGraph __SetUpGraph -#define SetUpGraphKway __SetUpGraphKway -#define SetUpGraph2 __SetUpGraph2 -#define VolSetUpGraph __VolSetUpGraph -#define RandomizeGraph __RandomizeGraph -#define IsConnectedSubdomain __IsConnectedSubdomain -#define IsConnected __IsConnected -#define IsConnected2 __IsConnected2 -#define FindComponents __FindComponents - +#define SetupGraph libmetis__SetupGraph +#define SetupGraph_adjrsum libmetis__SetupGraph_adjrsum +#define SetupGraph_tvwgt libmetis__SetupGraph_tvwgt +#define SetupGraph_label libmetis__SetupGraph_label +#define SetupSplitGraph libmetis__SetupSplitGraph +#define CreateGraph libmetis__CreateGraph +#define InitGraph libmetis__InitGraph +#define FreeSData libmetis__FreeSData +#define FreeRData libmetis__FreeRData +#define FreeGraph libmetis__FreeGraph +#define graph_WriteToDisk libmetis__graph_WriteToDisk +#define graph_ReadFromDisk libmetis__graph_ReadFromDisk /* initpart.c */ -#define Init2WayPartition __Init2WayPartition -#define InitSeparator __InitSeparator -#define GrowBisection __GrowBisection -#define GrowBisectionNode __GrowBisectionNode -#define RandomBisection __RandomBisection - +#define Init2WayPartition libmetis__Init2WayPartition +#define InitSeparator libmetis__InitSeparator +#define RandomBisection libmetis__RandomBisection +#define GrowBisection libmetis__GrowBisection +#define McRandomBisection libmetis__McRandomBisection +#define McGrowBisection libmetis__McGrowBisection +#define GrowBisectionNode libmetis__GrowBisectionNode /* kmetis.c */ -#define MlevelKWayPartitioning __MlevelKWayPartitioning - - -/* kvmetis.c */ -#define MlevelVolKWayPartitioning __MlevelVolKWayPartitioning - +#define MlevelKWayPartitioning libmetis__MlevelKWayPartitioning +#define InitKWayPartitioning libmetis__InitKWayPartitioning /* kwayfm.c */ -#define Random_KWayEdgeRefine __Random_KWayEdgeRefine -#define Greedy_KWayEdgeRefine __Greedy_KWayEdgeRefine -#define Greedy_KWayEdgeBalance __Greedy_KWayEdgeBalance - +#define Greedy_KWayOptimize libmetis__Greedy_KWayOptimize +#define Greedy_KWayCutOptimize libmetis__Greedy_KWayCutOptimize +#define Greedy_KWayVolOptimize libmetis__Greedy_KWayVolOptimize +#define Greedy_McKWayCutOptimize libmetis__Greedy_McKWayCutOptimize +#define Greedy_McKWayVolOptimize libmetis__Greedy_McKWayVolOptimize +#define IsArticulationNode libmetis__IsArticulationNode +#define KWayVolUpdate libmetis__KWayVolUpdate /* kwayrefine.c */ -#define RefineKWay __RefineKWay -#define AllocateKWayPartitionMemory __AllocateKWayPartitionMemory -#define ComputeKWayPartitionParams __ComputeKWayPartitionParams -#define ProjectKWayPartition __ProjectKWayPartition -#define IsBalanced __IsBalanced -#define ComputeKWayBoundary __ComputeKWayBoundary -#define ComputeKWayBalanceBoundary __ComputeKWayBalanceBoundary - - -/* kwayvolfm.c */ -#define Random_KWayVolRefine __Random_KWayVolRefine -#define Random_KWayVolRefineMConn __Random_KWayVolRefineMConn -#define Greedy_KWayVolBalance __Greedy_KWayVolBalance -#define Greedy_KWayVolBalanceMConn __Greedy_KWayVolBalanceMConn -#define KWayVolUpdate __KWayVolUpdate -#define ComputeKWayVolume __ComputeKWayVolume -#define ComputeVolume __ComputeVolume -#define CheckVolKWayPartitionParams __CheckVolKWayPartitionParams -#define ComputeVolSubDomainGraph __ComputeVolSubDomainGraph -#define EliminateVolSubDomainEdges __EliminateVolSubDomainEdges - - -/* kwayvolrefine.c */ -#define RefineVolKWay __RefineVolKWay -#define AllocateVolKWayPartitionMemory __AllocateVolKWayPartitionMemory -#define ComputeVolKWayPartitionParams __ComputeVolKWayPartitionParams -#define ComputeKWayVolGains __ComputeKWayVolGains -#define ProjectVolKWayPartition __ProjectVolKWayPartition -#define ComputeVolKWayBoundary __ComputeVolKWayBoundary -#define ComputeVolKWayBalanceBoundary __ComputeVolKWayBalanceBoundary - - -/* match.c */ -#define Match_RM __Match_RM -#define Match_RM_NVW __Match_RM_NVW -#define Match_HEM __Match_HEM -#define Match_SHEM __Match_SHEM - - -/* mbalance.c */ -#define MocBalance2Way __MocBalance2Way -#define MocGeneral2WayBalance __MocGeneral2WayBalance - - -/* mbalance2.c */ -#define MocBalance2Way2 __MocBalance2Way2 -#define MocGeneral2WayBalance2 __MocGeneral2WayBalance2 -#define SelectQueue3 __SelectQueue3 - - -/* mcoarsen.c */ -#define MCCoarsen2Way __MCCoarsen2Way - - -/* memory.c */ -#define AllocateWorkSpace __AllocateWorkSpace -#define FreeWorkSpace __FreeWorkSpace -#define WspaceAvail __WspaceAvail -#define idxwspacemalloc __idxwspacemalloc -#define idxwspacefree __idxwspacefree -#define fwspacemalloc __fwspacemalloc -#define CreateGraph __CreateGraph -#define InitGraph __InitGraph -#define FreeGraph __FreeGraph - +#define RefineKWay libmetis__RefineKWay +#define AllocateKWayPartitionMemory libmetis__AllocateKWayPartitionMemory +#define ComputeKWayPartitionParams libmetis__ComputeKWayPartitionParams +#define ProjectKWayPartition libmetis__ProjectKWayPartition +#define ComputeKWayBoundary libmetis__ComputeKWayBoundary +#define ComputeKWayVolGains libmetis__ComputeKWayVolGains +#define IsBalanced libmetis__IsBalanced + +/* mcutil */ +#define rvecle libmetis__rvecle +#define rvecge libmetis__rvecge +#define rvecsumle libmetis__rvecsumle +#define rvecmaxdiff libmetis__rvecmaxdiff +#define ivecle libmetis__ivecle +#define ivecge libmetis__ivecge +#define ivecaxpylez libmetis__ivecaxpylez +#define ivecaxpygez libmetis__ivecaxpygez +#define BetterVBalance libmetis__BetterVBalance +#define BetterBalance2Way libmetis__BetterBalance2Way +#define BetterBalanceKWay libmetis__BetterBalanceKWay +#define ComputeLoadImbalance libmetis__ComputeLoadImbalance +#define ComputeLoadImbalanceDiff libmetis__ComputeLoadImbalanceDiff +#define ComputeLoadImbalanceDiffVec libmetis__ComputeLoadImbalanceDiffVec +#define ComputeLoadImbalanceVec libmetis__ComputeLoadImbalanceVec /* mesh.c */ -#define TRIDUALMETIS __TRIDUALMETIS -#define TETDUALMETIS __TETDUALMETIS -#define HEXDUALMETIS __HEXDUALMETIS -#define TRINODALMETIS __TRINODALMETIS -#define TETNODALMETIS __TETNODALMETIS -#define HEXNODALMETIS __HEXNODALMETIS - - -/* mfm.c */ -#define MocFM_2WayEdgeRefine __MocFM_2WayEdgeRefine -#define SelectQueue __SelectQueue -#define BetterBalance __BetterBalance -#define Compute2WayHLoadImbalance __Compute2WayHLoadImbalance -#define Compute2WayHLoadImbalanceVec __Compute2WayHLoadImbalanceVec - - -/* mfm2.c */ -#define MocFM_2WayEdgeRefine2 __MocFM_2WayEdgeRefine2 -#define SelectQueue2 __SelectQueue2 -#define IsBetter2wayBalance __IsBetter2wayBalance - +#define CreateGraphDual libmetis__CreateGraphDual +#define FindCommonElements libmetis__FindCommonElements +#define CreateGraphNodal libmetis__CreateGraphNodal +#define FindCommonNodes libmetis__FindCommonNodes +#define CreateMesh libmetis__CreateMesh +#define InitMesh libmetis__InitMesh +#define FreeMesh libmetis__FreeMesh + +/* meshpart.c */ +#define InduceRowPartFromColumnPart libmetis__InduceRowPartFromColumnPart + +/* minconn.c */ +#define ComputeSubDomainGraph libmetis__ComputeSubDomainGraph +#define UpdateEdgeSubDomainGraph libmetis__UpdateEdgeSubDomainGraph +#define PrintSubDomainGraph libmetis__PrintSubDomainGraph +#define EliminateSubDomainEdges libmetis__EliminateSubDomainEdges +#define MoveGroupMinConnForCut libmetis__MoveGroupMinConnForCut +#define MoveGroupMinConnForVol libmetis__MoveGroupMinConnForVol /* mincover.c */ -#define MinCover __MinCover -#define MinCover_Augment __MinCover_Augment -#define MinCover_Decompose __MinCover_Decompose -#define MinCover_ColDFS __MinCover_ColDFS -#define MinCover_RowDFS __MinCover_RowDFS - - -/* minitpart.c */ -#define MocInit2WayPartition __MocInit2WayPartition -#define MocGrowBisection __MocGrowBisection -#define MocRandomBisection __MocRandomBisection -#define MocInit2WayBalance __MocInit2WayBalance -#define SelectQueueoneWay __SelectQueueoneWay - - -/* minitpart2.c */ -#define MocInit2WayPartition2 __MocInit2WayPartition2 -#define MocGrowBisection2 __MocGrowBisection2 -#define MocGrowBisectionNew2 __MocGrowBisectionNew2 -#define MocInit2WayBalance2 __MocInit2WayBalance2 -#define SelectQueueOneWay2 __SelectQueueOneWay2 - - -/* mkmetis.c */ -#define MCMlevelKWayPartitioning __MCMlevelKWayPartitioning - - -/* mkwayfmh.c */ -#define MCRandom_KWayEdgeRefineHorizontal __MCRandom_KWayEdgeRefineHorizontal -#define MCGreedy_KWayEdgeBalanceHorizontal __MCGreedy_KWayEdgeBalanceHorizontal -#define AreAllHVwgtsBelow __AreAllHVwgtsBelow -#define AreAllHVwgtsAbove __AreAllHVwgtsAbove -#define ComputeHKWayLoadImbalance __ComputeHKWayLoadImbalance -#define MocIsHBalanced __MocIsHBalanced -#define IsHBalanceBetterFT __IsHBalanceBetterFT -#define IsHBalanceBetterTT __IsHBalanceBetterTT - - -/* mkwayrefine.c */ -#define MocRefineKWayHorizontal __MocRefineKWayHorizontal -#define MocAllocateKWayPartitionMemory __MocAllocateKWayPartitionMemory -#define MocComputeKWayPartitionParams __MocComputeKWayPartitionParams -#define MocProjectKWayPartition __MocProjectKWayPartition -#define MocComputeKWayBalanceBoundary __MocComputeKWayBalanceBoundary - - -/* mmatch.c */ -#define MCMatch_RM __MCMatch_RM -#define MCMatch_HEM __MCMatch_HEM -#define MCMatch_SHEM __MCMatch_SHEM -#define MCMatch_SHEBM __MCMatch_SHEBM -#define MCMatch_SBHEM __MCMatch_SBHEM -#define BetterVBalance __BetterVBalance -#define AreAllVwgtsBelowFast __AreAllVwgtsBelowFast - +#define MinCover libmetis__MinCover +#define MinCover_Augment libmetis__MinCover_Augment +#define MinCover_Decompose libmetis__MinCover_Decompose +#define MinCover_ColDFS libmetis__MinCover_ColDFS +#define MinCover_RowDFS libmetis__MinCover_RowDFS /* mmd.c */ -#define genmmd __genmmd -#define mmdelm __mmdelm -#define mmdint __mmdint -#define mmdnum __mmdnum -#define mmdupd __mmdupd - - -/* mpmetis.c */ -#define MCMlevelRecursiveBisection __MCMlevelRecursiveBisection -#define MCHMlevelRecursiveBisection __MCHMlevelRecursiveBisection -#define MCMlevelEdgeBisection __MCMlevelEdgeBisection -#define MCHMlevelEdgeBisection __MCHMlevelEdgeBisection - - -/* mrefine.c */ -#define MocRefine2Way __MocRefine2Way -#define MocAllocate2WayPartitionMemory __MocAllocate2WayPartitionMemory -#define MocCompute2WayPartitionParams __MocCompute2WayPartitionParams -#define MocProject2WayPartition __MocProject2WayPartition - - -/* mrefine2.c */ -#define MocRefine2Way2 __MocRefine2Way2 - - -/* mutil.c */ -#define AreAllVwgtsBelow __AreAllVwgtsBelow -#define AreAnyVwgtsBelow __AreAnyVwgtsBelow -#define AreAllVwgtsAbove __AreAllVwgtsAbove -#define ComputeLoadImbalance __ComputeLoadImbalance -#define AreAllBelow __AreAllBelow - - -/* myqsort.c */ -#define iidxsort __iidxsort -#define iintsort __iintsort -#define ikeysort __ikeysort -#define ikeyvalsort __ikeyvalsort +#define genmmd libmetis__genmmd +#define mmdelm libmetis__mmdelm +#define mmdint libmetis__mmdint +#define mmdnum libmetis__mmdnum +#define mmdupd libmetis__mmdupd /* ometis.c */ -#define MlevelNestedDissection __MlevelNestedDissection -#define MlevelNestedDissectionCC __MlevelNestedDissectionCC -#define MlevelNodeBisectionMultiple __MlevelNodeBisectionMultiple -#define MlevelNodeBisection __MlevelNodeBisection -#define SplitGraphOrder __SplitGraphOrder -#define MMDOrder __MMDOrder -#define SplitGraphOrderCC __SplitGraphOrderCC - +#define MlevelNestedDissection libmetis__MlevelNestedDissection +#define MlevelNestedDissectionCC libmetis__MlevelNestedDissectionCC +#define MlevelNodeBisectionMultiple libmetis__MlevelNodeBisectionMultiple +#define MlevelNodeBisectionL2 libmetis__MlevelNodeBisectionL2 +#define MlevelNodeBisectionL1 libmetis__MlevelNodeBisectionL1 +#define SplitGraphOrder libmetis__SplitGraphOrder +#define SplitGraphOrderCC libmetis__SplitGraphOrderCC +#define MMDOrder libmetis__MMDOrder + +/* options.c */ +#define SetupCtrl libmetis__SetupCtrl +#define SetupKWayBalMultipliers libmetis__SetupKWayBalMultipliers +#define Setup2WayBalMultipliers libmetis__Setup2WayBalMultipliers +#define PrintCtrl libmetis__PrintCtrl +#define FreeCtrl libmetis__FreeCtrl +#define CheckParams libmetis__CheckParams /* parmetis.c */ -#define MlevelNestedDissectionP __MlevelNestedDissectionP -#define MCMlevelRecursiveBisection2 __MCMlevelRecursiveBisection2 - +#define MlevelNestedDissectionP libmetis__MlevelNestedDissectionP +#define FM_2WayNodeRefine1SidedP libmetis__FM_2WayNodeRefine1SidedP +#define FM_2WayNodeRefine2SidedP libmetis__FM_2WayNodeRefine2SidedP /* pmetis.c */ -#define MlevelRecursiveBisection __MlevelRecursiveBisection -#define MlevelEdgeBisection __MlevelEdgeBisection -#define SplitGraphPart __SplitGraphPart -#define SetUpSplitGraph __SetUpSplitGraph - - -/* pqueue.c */ -#define PQueueInit __PQueueInit -#define PQueueReset __PQueueReset -#define PQueueFree __PQueueFree -#define PQueueInsert __PQueueInsert -#define PQueueDelete __PQueueDelete -#define PQueueUpdate __PQueueUpdate -#define PQueueUpdateUp __PQueueUpdateUp -#define PQueueGetMax __PQueueGetMax -#define PQueueSeeMax __PQueueSeeMax -#define CheckHeap __CheckHeap - +#define MlevelRecursiveBisection libmetis__MlevelRecursiveBisection +#define MultilevelBisect libmetis__MultilevelBisect +#define SplitGraphPart libmetis__SplitGraphPart /* refine.c */ -#define Refine2Way __Refine2Way -#define Allocate2WayPartitionMemory __Allocate2WayPartitionMemory -#define Compute2WayPartitionParams __Compute2WayPartitionParams -#define Project2WayPartition __Project2WayPartition - +#define Refine2Way libmetis__Refine2Way +#define Allocate2WayPartitionMemory libmetis__Allocate2WayPartitionMemory +#define Compute2WayPartitionParams libmetis__Compute2WayPartitionParams +#define Project2WayPartition libmetis__Project2WayPartition /* separator.c */ -#define ConstructSeparator __ConstructSeparator -#define ConstructMinCoverSeparator0 __ConstructMinCoverSeparator0 -#define ConstructMinCoverSeparator __ConstructMinCoverSeparator - +#define ConstructSeparator libmetis__ConstructSeparator +#define ConstructMinCoverSeparator libmetis__ConstructMinCoverSeparator /* sfm.c */ -#define FM_2WayNodeRefine __FM_2WayNodeRefine -#define FM_2WayNodeRefineEqWgt __FM_2WayNodeRefineEqWgt -#define FM_2WayNodeRefine_OneSided __FM_2WayNodeRefine_OneSided -#define FM_2WayNodeBalance __FM_2WayNodeBalance -#define ComputeMaxNodeGain __ComputeMaxNodeGain - +#define FM_2WayNodeRefine2Sided libmetis__FM_2WayNodeRefine2Sided +#define FM_2WayNodeRefine1Sided libmetis__FM_2WayNodeRefine1Sided +#define FM_2WayNodeBalance libmetis__FM_2WayNodeBalance /* srefine.c */ -#define Refine2WayNode __Refine2WayNode -#define Allocate2WayNodePartitionMemory __Allocate2WayNodePartitionMemory -#define Compute2WayNodePartitionParams __Compute2WayNodePartitionParams -#define Project2WayNodePartition __Project2WayNodePartition - +#define Refine2WayNode libmetis__Refine2WayNode +#define Allocate2WayNodePartitionMemory libmetis__Allocate2WayNodePartitionMemory +#define Compute2WayNodePartitionParams libmetis__Compute2WayNodePartitionParams +#define Project2WayNodePartition libmetis__Project2WayNodePartition /* stat.c */ -#define ComputePartitionInfo __ComputePartitionInfo -#define ComputePartitionBalance __ComputePartitionBalance -#define ComputeElementBalance __ComputeElementBalance -#define Moc_ComputePartitionBalance __Moc_ComputePartitionBalance - - -/* subdomains.c */ -#define Random_KWayEdgeRefineMConn __Random_KWayEdgeRefineMConn -#define Greedy_KWayEdgeBalanceMConn __Greedy_KWayEdgeBalanceMConn -#define PrintSubDomainGraph __PrintSubDomainGraph -#define ComputeSubDomainGraph __ComputeSubDomainGraph -#define EliminateSubDomainEdges __EliminateSubDomainEdges -#define MoveGroupMConn __MoveGroupMConn -#define EliminateComponents __EliminateComponents -#define MoveGroup __MoveGroup - +#define ComputePartitionInfoBipartite libmetis__ComputePartitionInfoBipartite +#define ComputePartitionBalance libmetis__ComputePartitionBalance +#define ComputeElementBalance libmetis__ComputeElementBalance /* timing.c */ -#define InitTimers __InitTimers -#define PrintTimers __PrintTimers -#define seconds __seconds - +#define InitTimers libmetis__InitTimers +#define PrintTimers libmetis__PrintTimers /* util.c */ -#define errexit __errexit -#define GKfree __GKfree -#ifndef DMALLOC -#define imalloc __imalloc -#define idxmalloc __idxmalloc -#define fmalloc __fmalloc -#define ismalloc __ismalloc -#define idxsmalloc __idxsmalloc -#define GKmalloc __GKmalloc -#endif -#define iset __iset -#define idxset __idxset -#define sset __sset -#define iamax __iamax -#define idxamax __idxamax -#define idxamax_strd __idxamax_strd -#define samax __samax -#define samax2 __samax2 -#define idxamin __idxamin -#define samin __samin -#define idxsum __idxsum -#define idxsum_strd __idxsum_strd -#define idxadd __idxadd -#define charsum __charsum -#define isum __isum -#define ssum __ssum -#define ssum_strd __ssum_strd -#define sscale __sscale -#define snorm2 __snorm2 -#define sdot __sdot -#define saxpy __saxpy -#define RandomPermute __RandomPermute -#define ispow2 __ispow2 -#define InitRandom __InitRandom -#define log2Int __log2Int - - +#define iargmax_strd libmetis__iargmax_strd +#define iargmax_nrm libmetis__iargmax_nrm +#define iargmax2_nrm libmetis__iargmax2_nrm +#define rargmax2 libmetis__rargmax2 +#define InitRandom libmetis__InitRandom +#define metis_rcode libmetis__metis_rcode + +/* wspace.c */ +#define AllocateWorkSpace libmetis__AllocateWorkSpace +#define AllocateRefinementWorkSpace libmetis__AllocateRefinementWorkSpace +#define FreeWorkSpace libmetis__FreeWorkSpace +#define wspacemalloc libmetis__wspacemalloc +#define wspacepush libmetis__wspacepush +#define wspacepop libmetis__wspacepop +#define iwspacemalloc libmetis__iwspacemalloc +#define rwspacemalloc libmetis__rwspacemalloc +#define ikvwspacemalloc libmetis__ikvwspacemalloc +#define cnbrpoolReset libmetis__cnbrpoolReset +#define cnbrpoolGetNext libmetis__cnbrpoolGetNext +#define vnbrpoolReset libmetis__vnbrpoolReset +#define vnbrpoolGetNext libmetis__vnbrpoolGetNext +#endif diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/separator.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/separator.c index 94182091..72dae9b6 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/separator.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/separator.c @@ -8,27 +8,29 @@ * Started 8/1/97 * George * - * $Id: separator.c,v 1.1 2003/07/16 15:55:17 karypis Exp $ + * $Id: separator.c 10481 2011-07-05 18:01:23Z karypis $ * */ -#include +#include "metislib.h" /************************************************************************* * This function takes a bisection and constructs a minimum weight vertex * separator out of it. It uses the node-based separator refinement for it. **************************************************************************/ -void ConstructSeparator(CtrlType *ctrl, GraphType *graph, float ubfactor) +void ConstructSeparator(ctrl_t *ctrl, graph_t *graph) { - int i, j, k, nvtxs, nbnd; - idxtype *xadj, *where, *bndind; + idx_t i, j, k, nvtxs, nbnd; + idx_t *xadj, *where, *bndind; - nvtxs = graph->nvtxs; - xadj = graph->xadj; - nbnd = graph->nbnd; + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + nbnd = graph->nbnd; bndind = graph->bndind; - where = idxcopy(nvtxs, graph->where, idxwspacemalloc(ctrl, nvtxs)); + where = icopy(nvtxs, graph->where, iwspacemalloc(ctrl, nvtxs)); /* Put the nodes in the boundary into the separator */ for (i=0; irdata, LTERM); + FreeRData(graph); + Allocate2WayNodePartitionMemory(ctrl, graph); - idxcopy(nvtxs, where, graph->where); - idxwspacefree(ctrl, nvtxs); + icopy(nvtxs, where, graph->where); + + WCOREPOP; ASSERT(IsSeparable(graph)); @@ -48,124 +52,11 @@ void ConstructSeparator(CtrlType *ctrl, GraphType *graph, float ubfactor) ASSERT(CheckNodePartitionParams(graph)); - FM_2WayNodeRefine(ctrl, graph, ubfactor, 8); + FM_2WayNodeRefine2Sided(ctrl, graph, 1); + FM_2WayNodeRefine1Sided(ctrl, graph, 4); ASSERT(IsSeparable(graph)); -} - - - -/************************************************************************* -* This function takes a bisection and constructs a minimum weight vertex -* separator out of it. It uses an unweighted minimum-cover algorithm -* followed by node-based separator refinement. -**************************************************************************/ -void ConstructMinCoverSeparator0(CtrlType *ctrl, GraphType *graph, float ubfactor) -{ - int i, ii, j, jj, k, l, nvtxs, nbnd, bnvtxs[3], bnedges[2], csize; - idxtype *xadj, *adjncy, *bxadj, *badjncy; - idxtype *where, *bndind, *bndptr, *vmap, *ivmap, *cover; - - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; - - nbnd = graph->nbnd; - bndind = graph->bndind; - bndptr = graph->bndptr; - where = graph->where; - - vmap = idxwspacemalloc(ctrl, nvtxs); - ivmap = idxwspacemalloc(ctrl, nbnd); - cover = idxwspacemalloc(ctrl, nbnd); - - if (nbnd > 0) { - /* Go through the boundary and determine the sizes of the bipartite graph */ - bnvtxs[0] = bnvtxs[1] = bnedges[0] = bnedges[1] = 0; - for (i=0; i 0) { - bnvtxs[k]++; - bnedges[k] += xadj[j+1]-xadj[j]; - } - } - - bnvtxs[2] = bnvtxs[0]+bnvtxs[1]; - bnvtxs[1] = bnvtxs[0]; - bnvtxs[0] = 0; - - bxadj = idxmalloc(bnvtxs[2]+1, "ConstructMinCoverSeparator: bxadj"); - badjncy = idxmalloc(bnedges[0]+bnedges[1]+1, "ConstructMinCoverSeparator: badjncy"); - - /* Construct the ivmap and vmap */ - ASSERT(idxset(nvtxs, -1, vmap) == vmap); - for (i=0; i 0) { - vmap[j] = bnvtxs[k]; - ivmap[bnvtxs[k]++] = j; - } - } - /* OK, go through and put the vertices of each part starting from 0 */ - bnvtxs[1] = bnvtxs[0]; - bnvtxs[0] = 0; - bxadj[0] = l = 0; - for (k=0; k<2; k++) { - for (ii=0; iibndptr[jj])); - badjncy[l++] = vmap[jj]; - } - } - bxadj[++bnvtxs[k]] = l; - } - } - } - - ASSERT(l <= bnedges[0]+bnedges[1]); - - MinCover(bxadj, badjncy, bnvtxs[0], bnvtxs[1], cover, &csize); - - IFSET(ctrl->dbglvl, DBG_SEPINFO, - printf("Nvtxs: %6d, [%5d %5d], Cut: %6d, SS: [%6d %6d], Cover: %6d\n", nvtxs, graph->pwgts[0], graph->pwgts[1], graph->mincut, bnvtxs[0], bnvtxs[1]-bnvtxs[0], csize)); - - for (i=0; idbglvl, DBG_SEPINFO, - printf("Nvtxs: %6d, [%5d %5d], Cut: %6d, SS: [%6d %6d], Cover: %6d\n", nvtxs, graph->pwgts[0], graph->pwgts[1], graph->mincut, 0, 0, 0)); - } - - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, graph->nbnd); - idxwspacefree(ctrl, graph->nbnd); - graph->nbnd = nbnd; - - - ASSERT(IsSeparable(graph)); } @@ -175,25 +66,26 @@ void ConstructMinCoverSeparator0(CtrlType *ctrl, GraphType *graph, float ubfacto * separator out of it. It uses an unweighted minimum-cover algorithm * followed by node-based separator refinement. **************************************************************************/ -void ConstructMinCoverSeparator(CtrlType *ctrl, GraphType *graph, float ubfactor) +void ConstructMinCoverSeparator(ctrl_t *ctrl, graph_t *graph) { - int i, ii, j, jj, k, l, nvtxs, nbnd, bnvtxs[3], bnedges[2], csize; - idxtype *xadj, *adjncy, *bxadj, *badjncy; - idxtype *where, *bndind, *bndptr, *vmap, *ivmap, *cover; + idx_t i, ii, j, jj, k, l, nvtxs, nbnd, bnvtxs[3], bnedges[2], csize; + idx_t *xadj, *adjncy, *bxadj, *badjncy; + idx_t *where, *bndind, *bndptr, *vmap, *ivmap, *cover; + WCOREPUSH; - nvtxs = graph->nvtxs; - xadj = graph->xadj; + nvtxs = graph->nvtxs; + xadj = graph->xadj; adjncy = graph->adjncy; - nbnd = graph->nbnd; + nbnd = graph->nbnd; bndind = graph->bndind; bndptr = graph->bndptr; - where = graph->where; + where = graph->where; - vmap = idxwspacemalloc(ctrl, nvtxs); - ivmap = idxwspacemalloc(ctrl, nbnd); - cover = idxwspacemalloc(ctrl, nbnd); + vmap = iwspacemalloc(ctrl, nvtxs); + ivmap = iwspacemalloc(ctrl, nbnd); + cover = iwspacemalloc(ctrl, nbnd); if (nbnd > 0) { /* Go through the boundary and determine the sizes of the bipartite graph */ @@ -211,11 +103,11 @@ void ConstructMinCoverSeparator(CtrlType *ctrl, GraphType *graph, float ubfactor bnvtxs[1] = bnvtxs[0]; bnvtxs[0] = 0; - bxadj = idxmalloc(bnvtxs[2]+1, "ConstructMinCoverSeparator: bxadj"); - badjncy = idxmalloc(bnedges[0]+bnedges[1]+1, "ConstructMinCoverSeparator: badjncy"); + bxadj = iwspacemalloc(ctrl, bnvtxs[2]+1); + badjncy = iwspacemalloc(ctrl, bnedges[0]+bnedges[1]+1); /* Construct the ivmap and vmap */ - ASSERT(idxset(nvtxs, -1, vmap) == vmap); + ASSERT(iset(nvtxs, -1, vmap) == vmap); for (i=0; ibndptr[jj])); + ASSERTP(vmap[jj] != -1, ("%"PRIDX" %"PRIDX" %"PRIDX"\n", jj, vmap[jj], graph->bndptr[jj])); badjncy[l++] = vmap[jj]; } } @@ -250,34 +142,34 @@ void ConstructMinCoverSeparator(CtrlType *ctrl, GraphType *graph, float ubfactor MinCover(bxadj, badjncy, bnvtxs[0], bnvtxs[1], cover, &csize); - IFSET(ctrl->dbglvl, DBG_SEPINFO, - printf("Nvtxs: %6d, [%5d %5d], Cut: %6d, SS: [%6d %6d], Cover: %6d\n", nvtxs, graph->pwgts[0], graph->pwgts[1], graph->mincut, bnvtxs[0], bnvtxs[1]-bnvtxs[0], csize)); + IFSET(ctrl->dbglvl, METIS_DBG_SEPINFO, + printf("Nvtxs: %6"PRIDX", [%5"PRIDX" %5"PRIDX"], Cut: %6"PRIDX", SS: [%6"PRIDX" %6"PRIDX"], Cover: %6"PRIDX"\n", nvtxs, graph->pwgts[0], graph->pwgts[1], graph->mincut, bnvtxs[0], bnvtxs[1]-bnvtxs[0], csize)); for (i=0; idbglvl, DBG_SEPINFO, - printf("Nvtxs: %6d, [%5d %5d], Cut: %6d, SS: [%6d %6d], Cover: %6d\n", nvtxs, graph->pwgts[0], graph->pwgts[1], graph->mincut, 0, 0, 0)); + IFSET(ctrl->dbglvl, METIS_DBG_SEPINFO, + printf("Nvtxs: %6"PRIDX", [%5"PRIDX" %5"PRIDX"], Cut: %6"PRIDX", SS: [%6"PRIDX" %6"PRIDX"], Cover: %6"PRIDX"\n", nvtxs, graph->pwgts[0], graph->pwgts[1], graph->mincut, (idx_t)0, (idx_t)0, (idx_t)0)); } /* Prepare to refine the vertex separator */ - idxcopy(nvtxs, graph->where, vmap); - GKfree((void **)&graph->rdata, LTERM); + icopy(nvtxs, graph->where, vmap); + + FreeRData(graph); Allocate2WayNodePartitionMemory(ctrl, graph); - idxcopy(nvtxs, vmap, graph->where); - idxwspacefree(ctrl, nvtxs+2*graph->nbnd); + icopy(nvtxs, vmap, graph->where); + + WCOREPOP; Compute2WayNodePartitionParams(ctrl, graph); ASSERT(CheckNodePartitionParams(graph)); - FM_2WayNodeRefine_OneSided(ctrl, graph, ubfactor, 6); + FM_2WayNodeRefine1Sided(ctrl, graph, ctrl->niter); ASSERT(IsSeparable(graph)); } diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/sfm.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/sfm.c index e68ef1fb..366cf012 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/sfm.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/sfm.c @@ -3,98 +3,98 @@ * * sfm.c * - * This file contains code that implementes an FM-based separator refinement + * This file contains code that implements an FM-based separator refinement * * Started 8/1/97 * George * - * $Id: sfm.c,v 1.2 2003/07/31 06:14:01 karypis Exp $ + * $Id: sfm.c 10874 2011-10-17 23:13:00Z karypis $ * */ -#include +#include "metislib.h" -/************************************************************************* -* This function performs a node-based FM refinement -**************************************************************************/ -void FM_2WayNodeRefine(CtrlType *ctrl, GraphType *graph, float ubfactor, int npasses) +/*************************************************************************/ +/*! This function performs a node-based FM refinement */ +/**************************************************************************/ +void FM_2WayNodeRefine2Sided(ctrl_t *ctrl, graph_t *graph, idx_t niter) { - int i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind; - idxtype *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr; - idxtype *mptr, *mind, *moved, *swaps, *perm; - PQueueType parts[2]; - NRInfoType *rinfo; - int higain, oldgain, mincut, initcut, mincutorder; - int pass, to, other, limit; - int badmaxpwgt, mindiff, newdiff; - int u[2], g[2]; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; + idx_t i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind; + idx_t *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr; + idx_t *mptr, *mind, *moved, *swaps; + rpq_t *queues[2]; + nrinfo_t *rinfo; + idx_t higain, oldgain, mincut, initcut, mincutorder; + idx_t pass, to, other, limit; + idx_t badmaxpwgt, mindiff, newdiff; + idx_t u[2], g[2]; + real_t mult; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; adjncy = graph->adjncy; - vwgt = graph->vwgt; + vwgt = graph->vwgt; bndind = graph->bndind; bndptr = graph->bndptr; - where = graph->where; - pwgts = graph->pwgts; - rinfo = graph->nrinfo; - + where = graph->where; + pwgts = graph->pwgts; + rinfo = graph->nrinfo; - i = ComputeMaxNodeGain(nvtxs, xadj, adjncy, vwgt); - PQueueInit(ctrl, &parts[0], nvtxs, i); - PQueueInit(ctrl, &parts[1], nvtxs, i); + queues[0] = rpqCreate(nvtxs); + queues[1] = rpqCreate(nvtxs); - moved = idxwspacemalloc(ctrl, nvtxs); - swaps = idxwspacemalloc(ctrl, nvtxs); - mptr = idxwspacemalloc(ctrl, nvtxs+1); - mind = idxwspacemalloc(ctrl, nvtxs); - perm = idxwspacemalloc(ctrl, nvtxs); + moved = iwspacemalloc(ctrl, nvtxs); + swaps = iwspacemalloc(ctrl, nvtxs); + mptr = iwspacemalloc(ctrl, nvtxs+1); + mind = iwspacemalloc(ctrl, 2*nvtxs); - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("Partitions: [%6d %6d] Nv-Nb[%6d %6d]. ISep: %6d\n", pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); + mult = 0.5*ctrl->ubfactors[0]; + badmaxpwgt = (idx_t)(mult*(pwgts[0]+pwgts[1]+pwgts[2])); + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("Partitions-N2: [%6"PRIDX" %6"PRIDX"] Nv-Nb[%6"PRIDX" %6"PRIDX"]. ISep: %6"PRIDX"\n", pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); - for (pass=0; passmincut; nbnd = graph->nbnd; - RandomPermute(nbnd, perm, 1); + /* use the swaps array in place of the traditional perm array to save memory */ + irandArrayPermute(nbnd, swaps, nbnd, 1); for (ii=0; iioflags&OFLAG_COMPRESS ? amin(5*nbnd, 400) : amin(2*nbnd, 300)); - - badmaxpwgt = (int)(ubfactor*(pwgts[0]+pwgts[1])/2); + limit = (ctrl->compress ? gk_min(5*nbnd, 400) : gk_min(2*nbnd, 300)); /****************************************************** * Get into the FM loop *******************************************************/ mptr[0] = nmind = 0; - mindiff = abs(pwgts[0]-pwgts[1]); + mindiff = iabs(pwgts[0]-pwgts[1]); to = (pwgts[0] < pwgts[1] ? 0 : 1); for (nswaps=0; nswaps g[1] ? 0 : (g[0] < g[1] ? 1 : pass%2)); - /* to = (g[0] > g[1] ? 0 : (g[0] < g[1] ? 1 : (pwgts[0] < pwgts[1] ? 0 : 1))); */ if (pwgts[to]+vwgt[u[to]] > badmaxpwgt) to = (to+1)%2; @@ -113,245 +113,28 @@ void FM_2WayNodeRefine(CtrlType *ctrl, GraphType *graph, float ubfactor, int npa other = (to+1)%2; - higain = PQueueGetMax(&parts[to]); + higain = rpqGetTop(queues[to]); if (moved[higain] == -1) /* Delete if it was in the separator originally */ - PQueueDelete(&parts[other], higain, vwgt[higain]-rinfo[higain].edegrees[to]); + rpqDelete(queues[other], higain); ASSERT(bndptr[higain] != -1); - pwgts[2] -= (vwgt[higain]-rinfo[higain].edegrees[other]); - - newdiff = abs(pwgts[to]+vwgt[higain] - (pwgts[other]-rinfo[higain].edegrees[other])); - if (pwgts[2] < mincut || (pwgts[2] == mincut && newdiff < mindiff)) { - mincut = pwgts[2]; - mincutorder = nswaps; - mindiff = newdiff; - } - else { - if (nswaps - mincutorder > limit) { - pwgts[2] += (vwgt[higain]-rinfo[higain].edegrees[other]); - break; /* No further improvement, break out */ - } - } - - BNDDelete(nbnd, bndind, bndptr, higain); - pwgts[to] += vwgt[higain]; - where[higain] = to; - moved[higain] = nswaps; - swaps[nswaps] = higain; - - - /********************************************************** - * Update the degrees of the affected nodes - ***********************************************************/ - for (j=xadj[higain]; jdbglvl, DBG_MOVEINFO, - printf("Moved %6d to %3d, Gain: %5d [%5d] [%4d %4d] \t[%5d %5d %5d]\n", higain, to, g[to], g[other], vwgt[u[to]], vwgt[u[other]], pwgts[0], pwgts[1], pwgts[2])); - - } - - - /**************************************************************** - * Roll back computation - *****************************************************************/ - for (nswaps--; nswaps>mincutorder; nswaps--) { - higain = swaps[nswaps]; - - ASSERT(CheckNodePartitionParams(graph)); - - to = where[higain]; - other = (to+1)%2; - INC_DEC(pwgts[2], pwgts[to], vwgt[higain]); - where[higain] = 2; - BNDInsert(nbnd, bndind, bndptr, higain); - - edegrees = rinfo[higain].edegrees; - edegrees[0] = edegrees[1] = 0; - for (j=xadj[higain]; jdbglvl, DBG_REFINE, - printf("\tMinimum sep: %6d at %5d, PWGTS: [%6d %6d], NBND: %6d\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd)); - - graph->mincut = mincut; - graph->nbnd = nbnd; - - if (mincutorder == -1 || mincut >= initcut) - break; - } - - PQueueFree(ctrl, &parts[0]); - PQueueFree(ctrl, &parts[1]); - - idxwspacefree(ctrl, nvtxs+1); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); -} - - -/************************************************************************* -* This function performs a node-based FM refinement -**************************************************************************/ -void FM_2WayNodeRefineEqWgt(CtrlType *ctrl, GraphType *graph, int npasses) -{ - int i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind; - idxtype *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr; - idxtype *mptr, *mind, *moved, *swaps, *perm; - PQueueType parts[2]; - NRInfoType *rinfo; - int higain, oldgain, mincut, initcut, mincutorder; - int pass, to, other, limit; - int mindiff, newdiff; - int u[2], g[2]; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; - vwgt = graph->vwgt; - - bndind = graph->bndind; - bndptr = graph->bndptr; - where = graph->where; - pwgts = graph->pwgts; - rinfo = graph->nrinfo; - - - i = ComputeMaxNodeGain(nvtxs, xadj, adjncy, vwgt); - PQueueInit(ctrl, &parts[0], nvtxs, i); - PQueueInit(ctrl, &parts[1], nvtxs, i); - - moved = idxwspacemalloc(ctrl, nvtxs); - swaps = idxwspacemalloc(ctrl, nvtxs); - mptr = idxwspacemalloc(ctrl, nvtxs+1); - mind = idxwspacemalloc(ctrl, nvtxs); - perm = idxwspacemalloc(ctrl, nvtxs); - - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("Partitions: [%6d %6d] Nv-Nb[%6d %6d]. ISep: %6d\n", pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); - - for (pass=0; passmincut; - nbnd = graph->nbnd; - - RandomPermute(nbnd, perm, 1); - for (ii=0; iioflags&OFLAG_COMPRESS ? amin(5*nbnd, 400) : amin(2*nbnd, 300)); - - /****************************************************** - * Get into the FM loop - *******************************************************/ - mptr[0] = nmind = 0; - mindiff = abs(pwgts[0]-pwgts[1]); - to = (pwgts[0] < pwgts[1] ? 0 : 1); - for (nswaps=0; nswaps g[1] ? 0 : (g[0] < g[1] ? 1 : pass%2)); - } - } - other = (to+1)%2; - - if ((higain = PQueueGetMax(&parts[to])) == -1) + /* The following check is to ensure we break out if there is a possibility + of over-running the mind array. */ + if (nmind + xadj[higain+1]-xadj[higain] >= 2*nvtxs-1) break; - if (moved[higain] == -1) /* Delete if it was in the separator originally */ - PQueueDelete(&parts[other], higain, vwgt[higain]-rinfo[higain].edegrees[to]); - - ASSERT(bndptr[higain] != -1); - pwgts[2] -= (vwgt[higain]-rinfo[higain].edegrees[other]); - newdiff = abs(pwgts[to]+vwgt[higain] - (pwgts[other]-rinfo[higain].edegrees[other])); + newdiff = iabs(pwgts[to]+vwgt[higain] - (pwgts[other]-rinfo[higain].edegrees[other])); if (pwgts[2] < mincut || (pwgts[2] == mincut && newdiff < mindiff)) { mincut = pwgts[2]; mincutorder = nswaps; mindiff = newdiff; } else { - if (nswaps - mincutorder > limit) { + if (nswaps - mincutorder > 2*limit || + (nswaps - mincutorder > limit && pwgts[2] > 1.10*mincut)) { pwgts[2] += (vwgt[higain]-rinfo[higain].edegrees[other]); break; /* No further improvement, break out */ } @@ -373,10 +156,10 @@ void FM_2WayNodeRefineEqWgt(CtrlType *ctrl, GraphType *graph, int npasses) oldgain = vwgt[k]-rinfo[k].edegrees[to]; rinfo[k].edegrees[to] += vwgt[higain]; if (moved[k] == -1 || moved[k] == -(2+other)) - PQueueUpdate(&parts[other], k, oldgain, oldgain-vwgt[higain]); + rpqUpdate(queues[other], k, oldgain-vwgt[higain]); } else if (where[k] == other) { /* This vertex is pulled into the separator */ - ASSERTP(bndptr[k] == -1, ("%d %d %d\n", k, bndptr[k], where[k])); + ASSERTP(bndptr[k] == -1, ("%"PRIDX" %"PRIDX" %"PRIDX"\n", k, bndptr[k], where[k])); BNDInsert(nbnd, bndind, bndptr, k); mind[nmind++] = k; /* Keep track for rollback */ @@ -393,21 +176,21 @@ void FM_2WayNodeRefineEqWgt(CtrlType *ctrl, GraphType *graph, int npasses) oldgain = vwgt[kk]-rinfo[kk].edegrees[other]; rinfo[kk].edegrees[other] -= vwgt[k]; if (moved[kk] == -1 || moved[kk] == -(2+to)) - PQueueUpdate(&parts[to], kk, oldgain, oldgain+vwgt[k]); + rpqUpdate(queues[to], kk, oldgain+vwgt[k]); } } /* Insert the new vertex into the priority queue. Only one side! */ if (moved[k] == -1) { - PQueueInsert(&parts[to], k, vwgt[k]-edegrees[other]); + rpqInsert(queues[to], k, vwgt[k]-edegrees[other]); moved[k] = -(2+to); } } } mptr[nswaps+1] = nmind; - IFSET(ctrl->dbglvl, DBG_MOVEINFO, - printf("Moved %6d to %3d, Gain: %5d [%5d] [%4d %4d] \t[%5d %5d %5d]\n", higain, to, g[to], g[other], vwgt[u[to]], vwgt[u[other]], pwgts[0], pwgts[1], pwgts[2])); + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("Moved %6"PRIDX" to %3"PRIDX", Gain: %5"PRIDX" [%5"PRIDX"] [%4"PRIDX" %4"PRIDX"] \t[%5"PRIDX" %5"PRIDX" %5"PRIDX"]\n", higain, to, g[to], g[other], vwgt[u[to]], vwgt[u[other]], pwgts[0], pwgts[1], pwgts[2])); } @@ -453,8 +236,8 @@ void FM_2WayNodeRefineEqWgt(CtrlType *ctrl, GraphType *graph, int npasses) ASSERT(mincut == pwgts[2]); - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("\tMinimum sep: %6d at %5d, PWGTS: [%6d %6d], NBND: %6d\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd)); + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("\tMinimum sep: %6"PRIDX" at %5"PRIDX", PWGTS: [%6"PRIDX" %6"PRIDX"], NBND: %6"PRIDX"\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd)); graph->mincut = mincut; graph->nbnd = nbnd; @@ -463,135 +246,135 @@ void FM_2WayNodeRefineEqWgt(CtrlType *ctrl, GraphType *graph, int npasses) break; } - PQueueFree(ctrl, &parts[0]); - PQueueFree(ctrl, &parts[1]); + rpqDestroy(queues[0]); + rpqDestroy(queues[1]); - idxwspacefree(ctrl, nvtxs+1); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); + WCOREPOP; } -/************************************************************************* -* This function performs a node-based FM refinement. This is the -* one-way version -**************************************************************************/ -void FM_2WayNodeRefine_OneSided(CtrlType *ctrl, GraphType *graph, float ubfactor, int npasses) +/*************************************************************************/ +/*! This function performs a node-based FM refinement. + Each refinement iteration is split into two sub-iterations. + In each sub-iteration only moves to one of the left/right partitions + is allowed; hence, it is one-sided. +*/ +/**************************************************************************/ +void FM_2WayNodeRefine1Sided(ctrl_t *ctrl, graph_t *graph, idx_t niter) { - int i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind; - idxtype *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr; - idxtype *mptr, *mind, *swaps, *perm, *inqueue; - PQueueType parts; - NRInfoType *rinfo; - int higain, oldgain, mincut, initcut, mincutorder; - int pass, to, other, limit; - int badmaxpwgt, mindiff, newdiff; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; + idx_t i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind, iend; + idx_t *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr; + idx_t *mptr, *mind, *swaps; + rpq_t *queue; + nrinfo_t *rinfo; + idx_t higain, mincut, initcut, mincutorder; + idx_t pass, to, other, limit; + idx_t badmaxpwgt, mindiff, newdiff; + real_t mult; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; adjncy = graph->adjncy; - vwgt = graph->vwgt; + vwgt = graph->vwgt; bndind = graph->bndind; bndptr = graph->bndptr; - where = graph->where; - pwgts = graph->pwgts; - rinfo = graph->nrinfo; + where = graph->where; + pwgts = graph->pwgts; + rinfo = graph->nrinfo; - PQueueInit(ctrl, &parts, nvtxs, ComputeMaxNodeGain(nvtxs, xadj, adjncy, vwgt)); + queue = rpqCreate(nvtxs); - perm = idxwspacemalloc(ctrl, nvtxs); - swaps = idxwspacemalloc(ctrl, nvtxs); - mptr = idxwspacemalloc(ctrl, nvtxs+1); - mind = idxwspacemalloc(ctrl, nvtxs); - inqueue = idxwspacemalloc(ctrl, nvtxs); + swaps = iwspacemalloc(ctrl, nvtxs); + mptr = iwspacemalloc(ctrl, nvtxs+1); + mind = iwspacemalloc(ctrl, 2*nvtxs); - idxset(nvtxs, -1, inqueue); + mult = 0.5*ctrl->ubfactors[0]; + badmaxpwgt = (idx_t)(mult*(pwgts[0]+pwgts[1]+pwgts[2])); - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("Partitions-N1: [%6d %6d] Nv-Nb[%6d %6d]. ISep: %6d\n", pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("Partitions-N1: [%6"PRIDX" %6"PRIDX"] Nv-Nb[%6"PRIDX" %6"PRIDX"]. ISep: %6"PRIDX"\n", pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); - to = RandomInRange(2); - for (pass=0; passmincut; nbnd = graph->nbnd; - RandomPermute(nbnd, perm, 1); + /* use the swaps array in place of the traditional perm array to save memory */ + irandArrayPermute(nbnd, swaps, nbnd, 1); for (ii=0; iioflags&OFLAG_COMPRESS ? amin(5*nbnd, 400) : amin(2*nbnd, 300)); - - badmaxpwgt = (int)(ubfactor*(pwgts[0]+pwgts[1])/2); + limit = (ctrl->compress ? gk_min(5*nbnd, 500) : gk_min(3*nbnd, 300)); /****************************************************** * Get into the FM loop *******************************************************/ + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->Aux3Tmr)); mptr[0] = nmind = 0; - mindiff = abs(pwgts[0]-pwgts[1]); + mindiff = iabs(pwgts[0]-pwgts[1]); for (nswaps=0; nswaps badmaxpwgt) { /* Skip this vertex */ - swaps[nswaps] = -1; - mptr[nswaps+1] = nmind; - if (nswaps - mincutorder > limit) - break; - else - continue; - } + /* The following check is to ensure we break out if there is a possibility + of over-running the mind array. */ + if (nmind + xadj[higain+1]-xadj[higain] >= 2*nvtxs-1) + break; + + if (pwgts[to]+vwgt[higain] > badmaxpwgt) + break; /* No point going any further. Balance will be bad */ pwgts[2] -= (vwgt[higain]-rinfo[higain].edegrees[other]); - newdiff = abs(pwgts[to]+vwgt[higain] - (pwgts[other]-rinfo[higain].edegrees[other])); + newdiff = iabs(pwgts[to]+vwgt[higain] - (pwgts[other]-rinfo[higain].edegrees[other])); if (pwgts[2] < mincut || (pwgts[2] == mincut && newdiff < mindiff)) { - mincut = pwgts[2]; + mincut = pwgts[2]; mincutorder = nswaps; - mindiff = newdiff; + mindiff = newdiff; } else { - if (nswaps - mincutorder > limit) { + if (nswaps - mincutorder > 3*limit || + (nswaps - mincutorder > limit && pwgts[2] > 1.10*mincut)) { pwgts[2] += (vwgt[higain]-rinfo[higain].edegrees[other]); break; /* No further improvement, break out */ } } BNDDelete(nbnd, bndind, bndptr, higain); - pwgts[to] += vwgt[higain]; - where[higain] = to; - swaps[nswaps] = higain; + pwgts[to] += vwgt[higain]; + where[higain] = to; + swaps[nswaps] = higain; /********************************************************** * Update the degrees of the affected nodes ***********************************************************/ + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->Aux1Tmr)); for (j=xadj[higain]; jdbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->Aux1Tmr)); - IFSET(ctrl->dbglvl, DBG_MOVEINFO, - printf("Moved %6d to %3d, Gain: %5d [%5d] \t[%5d %5d %5d] [%3d %2d]\n", - higain, to, (vwgt[higain]-rinfo[higain].edegrees[other]), vwgt[higain], pwgts[0], pwgts[1], pwgts[2], nswaps, limit)); - + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("Moved %6"PRIDX" to %3"PRIDX", Gain: %5"PRIDX" [%5"PRIDX"] \t[%5"PRIDX" %5"PRIDX" %5"PRIDX"] [%3"PRIDX" %2"PRIDX"]\n", + higain, to, (vwgt[higain]-rinfo[higain].edegrees[other]), vwgt[higain], + pwgts[0], pwgts[1], pwgts[2], nswaps, limit)); } + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->Aux3Tmr)); /**************************************************************** * Roll back computation *****************************************************************/ + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->Aux2Tmr)); for (nswaps--; nswaps>mincutorder; nswaps--) { - if ((higain = swaps[nswaps]) == -1) - continue; /* that was a skipped vertex */ + higain = swaps[nswaps]; ASSERT(CheckNodePartitionParams(graph)); ASSERT(where[higain] == to); @@ -660,49 +442,46 @@ void FM_2WayNodeRefine_OneSided(CtrlType *ctrl, GraphType *graph, float ubfactor where[k] = other; INC_DEC(pwgts[other], pwgts[2], vwgt[k]); BNDDelete(nbnd, bndind, bndptr, k); - for (jj=xadj[k]; jjdbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->Aux2Tmr)); ASSERT(mincut == pwgts[2]); - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("\tMinimum sep: %6d at %5d, PWGTS: [%6d %6d], NBND: %6d\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd)); + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("\tMinimum sep: %6"PRIDX" at %5"PRIDX", PWGTS: [%6"PRIDX" %6"PRIDX"], NBND: %6"PRIDX"\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd)); graph->mincut = mincut; - graph->nbnd = nbnd; + graph->nbnd = nbnd; if (pass%2 == 1 && (mincutorder == -1 || mincut >= initcut)) break; } - PQueueFree(ctrl, &parts); + rpqDestroy(queue); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs+1); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); + WCOREPOP; } -/************************************************************************* -* This function performs a node-based FM refinement -**************************************************************************/ -void FM_2WayNodeBalance(CtrlType *ctrl, GraphType *graph, float ubfactor) +/*************************************************************************/ +/*! This function balances the left/right partitions of a separator + tri-section */ +/*************************************************************************/ +void FM_2WayNodeBalance(ctrl_t *ctrl, graph_t *graph) { - int i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps; - idxtype *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr; - idxtype *perm, *moved; - PQueueType parts; - NRInfoType *rinfo; - int higain, oldgain; - int pass, to, other; - int badmaxpwgt; + idx_t i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, gain; + idx_t badmaxpwgt, higain, oldgain, pass, to, other; + idx_t *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr; + idx_t *perm, *moved; + rpq_t *queue; + nrinfo_t *rinfo; + real_t mult; nvtxs = graph->nvtxs; xadj = graph->xadj; @@ -715,30 +494,33 @@ void FM_2WayNodeBalance(CtrlType *ctrl, GraphType *graph, float ubfactor) pwgts = graph->pwgts; rinfo = graph->nrinfo; - /* badmaxpwgt = (pwgts[0]+pwgts[1]+pwgts[2])*ubfactor; */ - badmaxpwgt = (pwgts[0]+pwgts[1])*ubfactor; - if (pwgts[0] <= badmaxpwgt && pwgts[1] <= badmaxpwgt) + mult = 0.5*ctrl->ubfactors[0]; + + badmaxpwgt = (idx_t)(mult*(pwgts[0]+pwgts[1])); + if (gk_max(pwgts[0], pwgts[1]) < badmaxpwgt) return; - if (abs(pwgts[0]-pwgts[1]) < 3*(pwgts[0]+pwgts[1]+pwgts[2])/nvtxs) + if (iabs(pwgts[0]-pwgts[1]) < 3*graph->tvwgt[0]/nvtxs) return; - to = (pwgts[0] < pwgts[1] ? 0 : 1); + WCOREPUSH; + + to = (pwgts[0] < pwgts[1] ? 0 : 1); other = (to+1)%2; - PQueueInit(ctrl, &parts, nvtxs, ComputeMaxNodeGain(nvtxs, xadj, adjncy, vwgt)); + queue = rpqCreate(nvtxs); - perm = idxwspacemalloc(ctrl, nvtxs); - moved = idxset(nvtxs, -1, idxwspacemalloc(ctrl, nvtxs)); + perm = iwspacemalloc(ctrl, nvtxs); + moved = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs)); - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("Partitions: [%6d %6d] Nv-Nb[%6d %6d]. ISep: %6d [B]\n", pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("Partitions: [%6"PRIDX" %6"PRIDX"] Nv-Nb[%6"PRIDX" %6"PRIDX"]. ISep: %6"PRIDX" [B]\n", pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); nbnd = graph->nbnd; - RandomPermute(nbnd, perm, 1); + irandArrayPermute(nbnd, perm, nbnd, 1); for (ii=0; ii pwgts[other]) + break; + + /* break if balance is achieved and no +ve or zero gain */ + if (gain < 0 && pwgts[other] < badmaxpwgt) break; -#endif + + /* skip this vertex if it will violate balance on the other side */ + if (pwgts[to]+vwgt[higain] > badmaxpwgt) + continue; ASSERT(bndptr[higain] != -1); - pwgts[2] -= (vwgt[higain]-rinfo[higain].edegrees[other]); + pwgts[2] -= gain; BNDDelete(nbnd, bndind, bndptr, higain); pwgts[to] += vwgt[higain]; where[higain] = to; - IFSET(ctrl->dbglvl, DBG_MOVEINFO, - printf("Moved %6d to %3d, Gain: %3d, \t[%5d %5d %5d]\n", higain, to, vwgt[higain]-rinfo[higain].edegrees[other], pwgts[0], pwgts[1], pwgts[2])); + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("Moved %6"PRIDX" to %3"PRIDX", Gain: %3"PRIDX", \t[%5"PRIDX" %5"PRIDX" %5"PRIDX"]\n", higain, to, vwgt[higain]-rinfo[higain].edegrees[other], pwgts[0], pwgts[1], pwgts[2])); /********************************************************** @@ -781,7 +571,7 @@ void FM_2WayNodeBalance(CtrlType *ctrl, GraphType *graph, float ubfactor) rinfo[k].edegrees[to] += vwgt[higain]; } else if (where[k] == other) { /* This vertex is pulled into the separator */ - ASSERTP(bndptr[k] == -1, ("%d %d %d\n", k, bndptr[k], where[k])); + ASSERTP(bndptr[k] == -1, ("%"PRIDX" %"PRIDX" %"PRIDX"\n", k, bndptr[k], where[k])); BNDInsert(nbnd, bndind, bndptr, k); where[k] = 2; @@ -799,53 +589,24 @@ void FM_2WayNodeBalance(CtrlType *ctrl, GraphType *graph, float ubfactor) rinfo[kk].edegrees[other] -= vwgt[k]; if (moved[kk] == -1) - PQueueUpdateUp(&parts, kk, oldgain, oldgain+vwgt[k]); + rpqUpdate(queue, kk, oldgain+vwgt[k]); } } /* Insert the new vertex into the priority queue */ - PQueueInsert(&parts, k, vwgt[k]-edegrees[other]); + rpqInsert(queue, k, vwgt[k]-edegrees[other]); } } - - if (pwgts[to] > pwgts[other]) - break; } - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("\tBalanced sep: %6d at %4d, PWGTS: [%6d %6d], NBND: %6d\n", pwgts[2], nswaps, pwgts[0], pwgts[1], nbnd)); + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("\tBalanced sep: %6"PRIDX" at %4"PRIDX", PWGTS: [%6"PRIDX" %6"PRIDX"], NBND: %6"PRIDX"\n", pwgts[2], nswaps, pwgts[0], pwgts[1], nbnd)); graph->mincut = pwgts[2]; - graph->nbnd = nbnd; - + graph->nbnd = nbnd; - PQueueFree(ctrl, &parts); - - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); - -} - - -/************************************************************************* -* This function computes the maximum possible gain for a vertex -**************************************************************************/ -int ComputeMaxNodeGain(int nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt) -{ - int i, j, k, max; - - max = 0; - for (j=xadj[0]; j +#include "metislib.h" -/************************************************************************* -* This function is the entry point of the separator refinement -**************************************************************************/ -void Refine2WayNode(CtrlType *ctrl, GraphType *orggraph, GraphType *graph, float ubfactor) +/*************************************************************************/ +/*! This function is the entry point of the separator refinement. + It does not perform any refinement on graph, but it starts by first + projecting it to the next level finer graph and proceeds from there. */ +/*************************************************************************/ +void Refine2WayNode(ctrl_t *ctrl, graph_t *orggraph, graph_t *graph) { - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->UncoarsenTmr)); - - for (;;) { - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->RefTmr)); - if (ctrl->RType != 15) - FM_2WayNodeBalance(ctrl, graph, ubfactor); - - switch (ctrl->RType) { - case 1: - FM_2WayNodeRefine(ctrl, graph, ubfactor, 8); - break; - case 2: - FM_2WayNodeRefine_OneSided(ctrl, graph, ubfactor, 8); - break; - case 3: - FM_2WayNodeRefine(ctrl, graph, ubfactor, 8); - FM_2WayNodeRefine_OneSided(ctrl, graph, ubfactor, 8); - break; - case 4: - FM_2WayNodeRefine_OneSided(ctrl, graph, ubfactor, 8); - FM_2WayNodeRefine(ctrl, graph, ubfactor, 8); - break; - case 5: - FM_2WayNodeRefineEqWgt(ctrl, graph, 8); - break; - } - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->RefTmr)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->UncoarsenTmr)); - if (graph == orggraph) - break; + if (graph == orggraph) { + Compute2WayNodePartitionParams(ctrl, graph); + } + else { + do { + graph = graph->finer; + + graph_ReadFromDisk(ctrl, graph); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->ProjectTmr)); + Project2WayNodePartition(ctrl, graph); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->ProjectTmr)); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->RefTmr)); + FM_2WayNodeBalance(ctrl, graph); + + ASSERT(CheckNodePartitionParams(graph)); + + switch (ctrl->rtype) { + case METIS_RTYPE_SEP2SIDED: + FM_2WayNodeRefine2Sided(ctrl, graph, ctrl->niter); + break; + case METIS_RTYPE_SEP1SIDED: + FM_2WayNodeRefine1Sided(ctrl, graph, ctrl->niter); + break; + default: + gk_errexit(SIGERR, "Unknown rtype of %d\n", ctrl->rtype); + } + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->RefTmr)); - graph = graph->finer; - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ProjectTmr)); - Project2WayNodePartition(ctrl, graph); - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ProjectTmr)); + } while (graph != orggraph); } - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->UncoarsenTmr)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->UncoarsenTmr)); } -/************************************************************************* -* This function allocates memory for 2-way edge refinement -**************************************************************************/ -void Allocate2WayNodePartitionMemory(CtrlType *ctrl, GraphType *graph) +/*************************************************************************/ +/*! This function allocates memory for 2-way node-based refinement */ +/**************************************************************************/ +void Allocate2WayNodePartitionMemory(ctrl_t *ctrl, graph_t *graph) { - int nvtxs, pad64; + idx_t nvtxs; nvtxs = graph->nvtxs; - pad64 = (3*nvtxs+3)%2; - - graph->rdata = idxmalloc(3*nvtxs+3+(sizeof(NRInfoType)/sizeof(idxtype))*nvtxs+pad64, "Allocate2WayPartitionMemory: rdata"); - graph->pwgts = graph->rdata; - graph->where = graph->rdata + 3; - graph->bndptr = graph->rdata + nvtxs + 3; - graph->bndind = graph->rdata + 2*nvtxs + 3; - graph->nrinfo = (NRInfoType *)(graph->rdata + 3*nvtxs + 3 + pad64); + graph->pwgts = imalloc(3, "Allocate2WayNodePartitionMemory: pwgts"); + graph->where = imalloc(nvtxs, "Allocate2WayNodePartitionMemory: where"); + graph->bndptr = imalloc(nvtxs, "Allocate2WayNodePartitionMemory: bndptr"); + graph->bndind = imalloc(nvtxs, "Allocate2WayNodePartitionMemory: bndind"); + graph->nrinfo = (nrinfo_t *)gk_malloc(nvtxs*sizeof(nrinfo_t), "Allocate2WayNodePartitionMemory: nrinfo"); } - -/************************************************************************* -* This function computes the initial id/ed -**************************************************************************/ -void Compute2WayNodePartitionParams(CtrlType *ctrl, GraphType *graph) +/*************************************************************************/ +/*! This function computes the edegrees[] to the left & right sides */ +/*************************************************************************/ +void Compute2WayNodePartitionParams(ctrl_t *ctrl, graph_t *graph) { - int i, j, k, l, nvtxs, nbnd; - idxtype *xadj, *adjncy, *adjwgt, *vwgt; - idxtype *where, *pwgts, *bndind, *bndptr, *edegrees; - NRInfoType *rinfo; - int me, other; + idx_t i, j, nvtxs, nbnd; + idx_t *xadj, *adjncy, *vwgt; + idx_t *where, *pwgts, *bndind, *bndptr, *edegrees; + nrinfo_t *rinfo; + idx_t me, other; nvtxs = graph->nvtxs; xadj = graph->xadj; vwgt = graph->vwgt; adjncy = graph->adjncy; - adjwgt = graph->adjwgt; where = graph->where; rinfo = graph->nrinfo; - pwgts = idxset(3, 0, graph->pwgts); + pwgts = iset(3, 0, graph->pwgts); bndind = graph->bndind; - bndptr = idxset(nvtxs, -1, graph->bndptr); + bndptr = iset(nvtxs, -1, graph->bndptr); /*------------------------------------------------------------ @@ -134,24 +129,24 @@ void Compute2WayNodePartitionParams(CtrlType *ctrl, GraphType *graph) ASSERT(CheckNodeBnd(graph, nbnd)); graph->mincut = pwgts[2]; - graph->nbnd = nbnd; + graph->nbnd = nbnd; } -/************************************************************************* -* This function computes the initial id/ed -**************************************************************************/ -void Project2WayNodePartition(CtrlType *ctrl, GraphType *graph) +/*************************************************************************/ +/*! This function projects the node-based bisection */ +/*************************************************************************/ +void Project2WayNodePartition(ctrl_t *ctrl, graph_t *graph) { - int i, j, nvtxs; - idxtype *cmap, *where, *cwhere; - GraphType *cgraph; + idx_t i, j, nvtxs; + idx_t *cmap, *where, *cwhere; + graph_t *cgraph; cgraph = graph->coarser; cwhere = cgraph->where; nvtxs = graph->nvtxs; - cmap = graph->cmap; + cmap = graph->cmap; Allocate2WayNodePartitionMemory(ctrl, graph); where = graph->where; @@ -159,10 +154,11 @@ void Project2WayNodePartition(CtrlType *ctrl, GraphType *graph) /* Project the partition */ for (i=0; i= 0 && where[i] <= 2, ("%d %d %d %d\n", i, cmap[i], where[i], cwhere[cmap[i]])); + ASSERTP(where[i] >= 0 && where[i] <= 2, ("%"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"\n", + i, cmap[i], where[i], cwhere[cmap[i]])); } - FreeGraph(graph->coarser); + FreeGraph(&graph->coarser); graph->coarser = NULL; Compute2WayNodePartitionParams(ctrl, graph); diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/stat.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/stat.c index 5a8002db..68600943 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/stat.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/stat.c @@ -8,130 +8,21 @@ * Started 7/25/97 * George * - * $Id: stat.c,v 1.2 2003/07/24 18:39:12 karypis Exp $ + * $Id: stat.c 17513 2014-08-05 16:20:50Z dominique $ * */ -#include +#include "metislib.h" /************************************************************************* * This function computes cuts and balance information **************************************************************************/ -void ComputePartitionInfo(GraphType *graph, int nparts, idxtype *where) +void ComputePartitionInfoBipartite(graph_t *graph, idx_t nparts, idx_t *where) { - int i, j, k, nvtxs, ncon, mustfree=0; - idxtype *xadj, *adjncy, *vwgt, *adjwgt, *kpwgts, *tmpptr; - idxtype *padjncy, *padjwgt, *padjcut; - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - adjncy = graph->adjncy; - vwgt = graph->vwgt; - adjwgt = graph->adjwgt; - - if (vwgt == NULL) { - vwgt = graph->vwgt = idxsmalloc(nvtxs, 1, "vwgt"); - mustfree = 1; - } - if (adjwgt == NULL) { - adjwgt = graph->adjwgt = idxsmalloc(xadj[nvtxs], 1, "adjwgt"); - mustfree += 2; - } - - printf("%d-way Cut: %5d, Vol: %5d, ", nparts, ComputeCut(graph, where), ComputeVolume(graph, where)); - - /* Compute balance information */ - kpwgts = idxsmalloc(ncon*nparts, 0, "ComputePartitionInfo: kpwgts"); - - for (i=0; iwhere; - graph->where = where; - for (i=0; iwhere = tmpptr; - - if (mustfree == 1 || mustfree == 3) { - GKfree((void **)&vwgt, LTERM); - graph->vwgt = NULL; - } - if (mustfree == 2 || mustfree == 3) { - GKfree((void **)&adjwgt, LTERM); - graph->adjwgt = NULL; - } - - GKfree((void **)&kpwgts, &padjncy, &padjwgt, &padjcut, LTERM); -} - - -/************************************************************************* -* This function computes cuts and balance information -**************************************************************************/ -void ComputePartitionInfoBipartite(GraphType *graph, int nparts, idxtype *where) -{ - int i, j, k, nvtxs, ncon, mustfree=0; - idxtype *xadj, *adjncy, *vwgt, *vsize, *adjwgt, *kpwgts, *tmpptr; - idxtype *padjncy, *padjwgt, *padjcut; + idx_t i, j, k, nvtxs, ncon, mustfree=0; + idx_t *xadj, *adjncy, *vwgt, *vsize, *adjwgt, *kpwgts, *tmpptr; + idx_t *padjncy, *padjwgt, *padjcut; nvtxs = graph->nvtxs; ncon = graph->ncon; @@ -142,18 +33,18 @@ void ComputePartitionInfoBipartite(GraphType *graph, int nparts, idxtype *where) adjwgt = graph->adjwgt; if (vwgt == NULL) { - vwgt = graph->vwgt = idxsmalloc(nvtxs, 1, "vwgt"); + vwgt = graph->vwgt = ismalloc(nvtxs, 1, "vwgt"); mustfree = 1; } if (adjwgt == NULL) { - adjwgt = graph->adjwgt = idxsmalloc(xadj[nvtxs], 1, "adjwgt"); + adjwgt = graph->adjwgt = ismalloc(xadj[nvtxs], 1, "adjwgt"); mustfree += 2; } - printf("%d-way Cut: %5d, Vol: %5d, ", nparts, ComputeCut(graph, where), ComputeVolume(graph, where)); + printf("%"PRIDX"-way Cut: %5"PRIDX", Vol: %5"PRIDX", ", nparts, ComputeCut(graph, where), ComputeVolume(graph, where)); /* Compute balance information */ - kpwgts = idxsmalloc(ncon*nparts, 0, "ComputePartitionInfo: kpwgts"); + kpwgts = ismalloc(ncon*nparts, 0, "ComputePartitionInfo: kpwgts"); for (i=0; ivwgt = NULL; } if (mustfree == 2 || mustfree == 3) { - GKfree((void **)&adjwgt, LTERM); + gk_free((void **)&adjwgt, LTERM); graph->adjwgt = NULL; } - GKfree((void **)&kpwgts, &padjncy, &padjwgt, &padjcut, LTERM); + gk_free((void **)&kpwgts, &padjncy, &padjwgt, &padjcut, LTERM); } - /************************************************************************* * This function computes the balance of the partitioning **************************************************************************/ -void ComputePartitionBalance(GraphType *graph, int nparts, idxtype *where, float *ubvec) +void ComputePartitionBalance(graph_t *graph, idx_t nparts, idx_t *where, real_t *ubvec) { - int i, j, nvtxs, ncon; - idxtype *kpwgts, *vwgt; - float balance; + idx_t i, j, nvtxs, ncon; + idx_t *kpwgts, *vwgt; + real_t balance; nvtxs = graph->nvtxs; ncon = graph->ncon; vwgt = graph->vwgt; - kpwgts = idxsmalloc(nparts, 0, "ComputePartitionInfo: kpwgts"); + kpwgts = ismalloc(nparts, 0, "ComputePartitionInfo: kpwgts"); - if (vwgt == NULL && ncon == 1) { + if (vwgt == NULL) { for (i=0; invtxs; i++) kpwgts[where[i]] += vwgt[i*ncon+j]; - ubvec[j] = 1.0*nparts*kpwgts[idxamax(nparts, kpwgts)]/(1.0*idxsum(nparts, kpwgts)); + ubvec[j] = 1.0*nparts*kpwgts[iargmax(nparts, kpwgts,1)]/(1.0*isum(nparts, kpwgts, 1)); } } - GKfree((void **)&kpwgts, LTERM); + + gk_free((void **)&kpwgts, LTERM); + } /************************************************************************* * This function computes the balance of the element partitioning **************************************************************************/ -float ComputeElementBalance(int ne, int nparts, idxtype *where) +real_t ComputeElementBalance(idx_t ne, idx_t nparts, idx_t *where) { - int i; - idxtype *kpwgts; - float balance; + idx_t i; + idx_t *kpwgts; + real_t balance; - kpwgts = idxsmalloc(nparts, 0, "ComputeElementBalance: kpwgts"); + kpwgts = ismalloc(nparts, 0, "ComputeElementBalance: kpwgts"); for (i=0; invtxs; - ncon = graph->ncon; - nvwgt = graph->nvwgt; - kpwgts = fmalloc(nparts, "ComputePartitionInfo: kpwgts"); - - for (j=0; jnvtxs; i++) - kpwgts[where[i]] += nvwgt[i*ncon+j]; - - ubvec[j] = (float)nparts*kpwgts[samax(nparts, kpwgts)]/ssum(nparts, kpwgts); - } - - GKfree((void **)&kpwgts, LTERM); } + diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/stats.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/stats.c deleted file mode 100644 index 4f6b5484..00000000 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/stats.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * stat.c - * - * This file computes various statistics - * - * Started 7/25/97 - * George - * - * $Id: stats.c,v 1.1 2003/03/13 06:33:20 karypis Exp $ - * - */ - -#include - - -/************************************************************************* -* This function computes the balance of the partitioning -**************************************************************************/ -void Moc_ComputePartitionBalance(GraphType *graph, int nparts, idxtype *where, float *ubvec) -{ - int i, j, nvtxs, ncon; - float *kpwgts, *nvwgt; - float balance; - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - nvwgt = graph->nvwgt; - - kpwgts = fmalloc(nparts, "ComputePartitionInfo: kpwgts"); - - for (j=0; jnvtxs; i++) - kpwgts[where[i]] += nvwgt[i*ncon+j]; - - ubvec[j] = (float)nparts*kpwgts[samax(nparts, kpwgts)]/ssum(nparts, kpwgts); - } - - free(kpwgts); - -} - diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/stdheaders.h b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/stdheaders.h index 903d5ad6..148f88d4 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/stdheaders.h +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/stdheaders.h @@ -8,9 +8,11 @@ * Started 8/27/94 * George * - * $Id: stdheaders.h,v 1.2 2003/07/25 14:31:45 karypis Exp $ + * $Id: stdheaders.h 5993 2009-01-07 02:09:57Z karypis $ */ +#ifndef _LIBMETIS_STDHEADERS_H_ +#define _LIBMETIS_STDHEADERS_H_ #include #ifdef __STDC__ @@ -23,5 +25,5 @@ #include #include #include -#include +#endif diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/struct.h b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/struct.h index ff091c6c..6e0381de 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/struct.h +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/struct.h @@ -8,246 +8,213 @@ * Started 9/26/95 * George * - * $Id: struct.h,v 1.2 2003/07/25 13:52:01 karypis Exp $ + * $Id: struct.h 14362 2013-05-21 21:35:23Z karypis $ */ -#ifndef __parmetis_h__ -/* Undefine the following #define in order to use short int as the idxtype */ -#define IDXTYPE_INT - -/* Indexes are as long as integers for now */ -#ifdef IDXTYPE_INT -typedef int idxtype; -#else -typedef short idxtype; -#endif -#endif - -#define MAXIDX (1<<8*sizeof(idxtype)-2) - - -/************************************************************************* -* The following data structure stores key-value pair -**************************************************************************/ -struct KeyValueType { - idxtype key; - idxtype val; -}; - -typedef struct KeyValueType KeyValueType; - - -/************************************************************************* -* The following data structure will hold a node of a doubly-linked list. -**************************************************************************/ -struct ListNodeType { - int id; /* The id value of the node */ - struct ListNodeType *prev, *next; /* It's a doubly-linked list */ -}; - -typedef struct ListNodeType ListNodeType; - - - -/************************************************************************* -* The following data structure is used to store the buckets for the -* refinment algorithms -**************************************************************************/ -struct PQueueType { - int type; /* The type of the representation used */ - int nnodes; - int maxnodes; - int mustfree; - - /* Linear array version of the data structures */ - int pgainspan, ngainspan; /* plus and negative gain span */ - int maxgain; - ListNodeType *nodes; - ListNodeType **buckets; - - /* Heap version of the data structure */ - KeyValueType *heap; - idxtype *locator; -}; - -typedef struct PQueueType PQueueType; - - -/************************************************************************* -* The following data structure stores an edge -**************************************************************************/ -struct edegreedef { - idxtype pid; - idxtype ed; -}; -typedef struct edegreedef EDegreeType; - - -/************************************************************************* -* The following data structure stores an edge for vol -**************************************************************************/ -struct vedegreedef { - idxtype pid; - idxtype ed, ned; - idxtype gv; -}; -typedef struct vedegreedef VEDegreeType; - - -/************************************************************************* -* This data structure holds various working space data -**************************************************************************/ -struct workspacedef { - idxtype *core; /* Where pairs, indices, and degrees are coming from */ - int maxcore, ccore; - - EDegreeType *edegrees; - VEDegreeType *vedegrees; - int cdegree; - - idxtype *auxcore; /* This points to the memory of the edegrees */ - - idxtype *pmat; /* An array of k^2 used for eliminating domain - connectivity in k-way refinement */ -}; - -typedef struct workspacedef WorkSpaceType; - - -/************************************************************************* -* The following data structure holds information on degrees for k-way -* partition -**************************************************************************/ -struct rinfodef { - int id, ed; /* ID/ED of nodes */ - int ndegrees; /* The number of different ext-degrees */ - EDegreeType *edegrees; /* List of edges */ -}; - -typedef struct rinfodef RInfoType; - - -/************************************************************************* -* The following data structure holds information on degrees for k-way -* vol-based partition -**************************************************************************/ -struct vrinfodef { - int id, ed, nid; /* ID/ED of nodes */ - int gv; /* IV/EV of nodes */ - int ndegrees; /* The number of different ext-degrees */ - VEDegreeType *edegrees; /* List of edges */ -}; - -typedef struct vrinfodef VRInfoType; - - -/************************************************************************* -* The following data structure holds information on degrees for k-way -* partition -**************************************************************************/ -struct nrinfodef { - idxtype edegrees[2]; -}; - -typedef struct nrinfodef NRInfoType; - - -/************************************************************************* -* This data structure holds the input graph -**************************************************************************/ -struct graphdef { - idxtype *gdata, *rdata; /* Memory pools for graph and refinement data. - This is where memory is allocated and used - the rest of the fields in this structure */ - - int nvtxs, nedges; /* The # of vertices and edges in the graph */ - idxtype *xadj; /* Pointers to the locally stored vertices */ - idxtype *vwgt; /* Vertex weights */ - idxtype *vsize; /* Vertex sizes for min-volume formulation */ - idxtype *adjncy; /* Array that stores the adjacency lists of nvtxs */ - idxtype *adjwgt; /* Array that stores the weights of the adjacency lists */ - - idxtype *adjwgtsum; /* The sum of the adjacency weight of each vertex */ - - idxtype *label; - - idxtype *cmap; +#ifndef _LIBMETIS_STRUCT_H_ +#define _LIBMETIS_STRUCT_H_ + + + +/*************************************************************************/ +/*! This data structure stores cut-based k-way refinement info about an + adjacent subdomain for a given vertex. */ +/*************************************************************************/ +typedef struct cnbr_t { + idx_t pid; /*!< The partition ID */ + idx_t ed; /*!< The sum of the weights of the adjacent edges + that are incident on pid */ +} cnbr_t; + + +/*************************************************************************/ +/*! The following data structure stores holds information on degrees for k-way + partition */ +/*************************************************************************/ +typedef struct ckrinfo_t { + idx_t id; /*!< The internal degree of a vertex (sum of weights) */ + idx_t ed; /*!< The total external degree of a vertex */ + idx_t nnbrs; /*!< The number of neighboring subdomains */ + idx_t inbr; /*!< The index in the cnbr_t array where the nnbrs list + of neighbors is stored */ +} ckrinfo_t; + + +/*************************************************************************/ +/*! This data structure stores volume-based k-way refinement info about an + adjacent subdomain for a given vertex. */ +/*************************************************************************/ +typedef struct vnbr_t { + idx_t pid; /*!< The partition ID */ + idx_t ned; /*!< The number of the adjacent edges + that are incident on pid */ + idx_t gv; /*!< The gain in volume achieved by moving the + vertex to pid */ +} vnbr_t; + + +/*************************************************************************/ +/*! The following data structure holds information on degrees for k-way + vol-based partition */ +/*************************************************************************/ +typedef struct vkrinfo_t { + idx_t nid; /*!< The internal degree of a vertex (count of edges) */ + idx_t ned; /*!< The total external degree of a vertex (count of edges) */ + idx_t gv; /*!< The volume gain of moving that vertex */ + idx_t nnbrs; /*!< The number of neighboring subdomains */ + idx_t inbr; /*!< The index in the vnbr_t array where the nnbrs list + of neighbors is stored */ +} vkrinfo_t; + + +/*************************************************************************/ +/*! The following data structure holds information on degrees for k-way + partition */ +/*************************************************************************/ +typedef struct nrinfo_t { + idx_t edegrees[2]; +} nrinfo_t; + + +/*************************************************************************/ +/*! This data structure holds a graph */ +/*************************************************************************/ +typedef struct graph_t { + idx_t nvtxs, nedges; /* The # of vertices and edges in the graph */ + idx_t ncon; /* The # of constrains */ + idx_t *xadj; /* Pointers to the locally stored vertices */ + idx_t *vwgt; /* Vertex weights */ + idx_t *vsize; /* Vertex sizes for min-volume formulation */ + idx_t *adjncy; /* Array that stores the adjacency lists of nvtxs */ + idx_t *adjwgt; /* Array that stores the weights of the adjacency lists */ + + idx_t *tvwgt; /* The sum of the vertex weights in the graph */ + real_t *invtvwgt; /* The inverse of the sum of the vertex weights in the graph */ + + + /* These are to keep track control if the corresponding fields correspond to + application or library memory */ + int free_xadj, free_vwgt, free_vsize, free_adjncy, free_adjwgt; + + idx_t *cmap; /* The contraction/coarsening map */ + + idx_t *label; /* The labels of the vertices for recursive bisection (pmetis/ometis) */ /* Partition parameters */ - int mincut, minvol; - idxtype *where, *pwgts; - int nbnd; - idxtype *bndptr, *bndind; + idx_t mincut, minvol; + idx_t *where, *pwgts; + idx_t nbnd; + idx_t *bndptr, *bndind; /* Bisection refinement parameters */ - idxtype *id, *ed; + idx_t *id, *ed; /* K-way refinement parameters */ - RInfoType *rinfo; - - /* K-way volume refinement parameters */ - VRInfoType *vrinfo; + ckrinfo_t *ckrinfo; /*!< The per-vertex cut-based refinement info */ + vkrinfo_t *vkrinfo; /*!< The per-vertex volume-based refinement info */ /* Node refinement information */ - NRInfoType *nrinfo; - - - /* Additional info needed by the MOC routines */ - int ncon; /* The # of constrains */ - float *nvwgt; /* Normalized vertex weights */ - float *npwgts; /* The normalized partition weights */ - - struct graphdef *coarser, *finer; -}; - -typedef struct graphdef GraphType; - - - -/************************************************************************* -* The following data type implements a timer -**************************************************************************/ -typedef double timer; - - -/************************************************************************* -* The following structure stores information used by Metis -**************************************************************************/ -struct controldef { - int CoarsenTo; /* The # of vertices in the coarsest graph */ - int dbglvl; /* Controls the debuging output of the program */ - int CType; /* The type of coarsening */ - int IType; /* The type of initial partitioning */ - int RType; /* The type of refinement */ - int maxvwgt; /* The maximum allowed weight for a vertex */ - float nmaxvwgt; /* The maximum allowed weight for a vertex for each constrain */ - int optype; /* Type of operation */ - int pfactor; /* .1*prunning factor */ - int nseps; /* The number of separators to be found during multiple bisections */ - int oflags; - - WorkSpaceType wspace; /* Work Space Informations */ + nrinfo_t *nrinfo; + + /* various fields for out-of-core processing */ + int gID; + int ondisk; + + /* keep track of the dropped edgewgt */ + idx_t droppedewgt; + + /* the linked-list structure of the sequence of graphs */ + struct graph_t *coarser, *finer; + +} graph_t; + + +/*************************************************************************/ +/*! This data structure holds a mesh */ +/*************************************************************************/ +typedef struct mesh_t { + idx_t ne, nn; /*!< The # of elements and nodes in the mesh */ + idx_t ncon; /*!< The number of element balancing constraints (element weights) */ + + idx_t *eptr, *eind; /*!< The CSR-structure storing the nodes in the elements */ + idx_t *ewgt; /*!< The weights of the elements */ +} mesh_t; + + + +/*************************************************************************/ +/*! The following structure stores information used by Metis */ +/*************************************************************************/ +typedef struct ctrl_t { + moptype_et optype; /* Type of operation */ + mobjtype_et objtype; /* Type of refinement objective */ + mdbglvl_et dbglvl; /* Controls the debugging output of the program */ + mctype_et ctype; /* The type of coarsening */ + miptype_et iptype; /* The type of initial partitioning */ + mrtype_et rtype; /* The type of refinement */ + + idx_t CoarsenTo; /* The # of vertices in the coarsest graph */ + idx_t nIparts; /* The number of initial partitions to compute */ + idx_t no2hop; /* Indicates if 2-hop matching will be used */ + idx_t ondisk; /* Indicates out-of-core execution */ + idx_t minconn; /* Indicates if the subdomain connectivity will be minimized */ + idx_t contig; /* Indicates if contiguous partitions are required */ + idx_t nseps; /* The number of separators to be found during multiple bisections */ + idx_t ufactor; /* The user-supplied load imbalance factor */ + idx_t compress; /* If the graph will be compressed prior to ordering */ + idx_t ccorder; /* If connected components will be ordered separately */ + idx_t seed; /* The seed for the random number generator */ + idx_t ncuts; /* The number of different partitionings to compute */ + idx_t niter; /* The number of iterations during each refinement */ + idx_t numflag; /* The user-supplied numflag for the graph */ + idx_t dropedges; /* Indicates if edges will be randomly dropped during coarsening */ + idx_t *maxvwgt; /* The maximum allowed weight for a vertex */ + + idx_t ncon; /*!< The number of balancing constraints */ + idx_t nparts; /*!< The number of partitions */ + + real_t pfactor; /* .1*(user-supplied prunning factor) */ + + real_t *ubfactors; /*!< The per-constraint ubfactors */ + + real_t *tpwgts; /*!< The target partition weights */ + real_t *pijbm; /*!< The nparts*ncon multiplies for the ith partition + and jth constraint for obtaining the balance */ + + real_t cfactor; /*!< The achieved compression factor */ /* Various Timers */ - timer TotalTmr, InitPartTmr, MatchTmr, ContractTmr, CoarsenTmr, UncoarsenTmr, - SepTmr, RefTmr, ProjectTmr, SplitTmr, AuxTmr1, AuxTmr2, AuxTmr3, AuxTmr4, AuxTmr5, AuxTmr6; - -}; + double TotalTmr, InitPartTmr, MatchTmr, ContractTmr, CoarsenTmr, UncoarsenTmr, + RefTmr, ProjectTmr, SplitTmr, Aux1Tmr, Aux2Tmr, Aux3Tmr; -typedef struct controldef CtrlType; + /* Workspace information */ + gk_mcore_t *mcore; /*!< The persistent memory core for within function + mallocs/frees */ + /* These are for use by the k-way refinement routines */ + size_t nbrpoolsize_max; /*!< The maximum number of {c,v}nbr_t entries that will ever be allocated */ + size_t nbrpoolsize; /*!< The number of {c,v}nbr_t entries that have been allocated */ + size_t nbrpoolcpos; /*!< The position of the first free entry in the array */ + size_t nbrpoolreallocs; /*!< The number of times the pool was resized */ -/************************************************************************* -* The following data structure stores max-partition weight info for -* Vertical MOC k-way refinement -**************************************************************************/ -struct vpwgtdef { - float max[2][MAXNCON]; - int imax[2][MAXNCON]; -}; + cnbr_t *cnbrpool; /*!< The pool of cnbr_t entries to be used during refinement. + The size and current position of the pool is controlled + by nnbrs & cnbrs */ + vnbr_t *vnbrpool; /*!< The pool of vnbr_t entries to be used during refinement. + The size and current position of the pool is controlled + by nnbrs & cnbrs */ -typedef struct vpwgtdef VPInfoType; + /* The subdomain graph, in sparse format */ + idx_t *maxnads; /* The maximum allocated number of adjacent domains */ + idx_t *nads; /* The number of adjacent domains */ + idx_t **adids; /* The IDs of the adjacent domains */ + idx_t **adwgts; /* The edge-weight to the adjacent domains */ + idx_t *pvec1, *pvec2; /* Auxiliary nparts-size vectors for efficiency */ + /* ondisk related info */ + pid_t pid; /*!< The pid of the running process */ +} ctrl_t; +#endif diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/subdomains.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/subdomains.c deleted file mode 100644 index 739d173e..00000000 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/subdomains.c +++ /dev/null @@ -1,1294 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * subdomains.c - * - * This file contains functions that deal with prunning the number of - * adjacent subdomains in KMETIS - * - * Started 7/15/98 - * George - * - * $Id: subdomains.c,v 1.2 2003/07/31 06:14:01 karypis Exp $ - * - */ - -#include - - -/************************************************************************* -* This function performs k-way refinement -**************************************************************************/ -void Random_KWayEdgeRefineMConn(CtrlType *ctrl, GraphType *graph, int nparts, float *tpwgts, float ubfactor, int npasses, int ffactor) -{ - int i, ii, iii, j, jj, k, l, pass, nvtxs, nmoves, nbnd, tvwgt, myndegrees; - int from, me, to, oldcut, vwgt, gain; - int maxndoms, nadd; - idxtype *xadj, *adjncy, *adjwgt; - idxtype *where, *pwgts, *perm, *bndptr, *bndind, *minwgt, *maxwgt, *itpwgts; - idxtype *phtable, *pmat, *pmatptr, *ndoms; - EDegreeType *myedegrees; - RInfoType *myrinfo; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - bndptr = graph->bndptr; - bndind = graph->bndind; - - where = graph->where; - pwgts = graph->pwgts; - - pmat = ctrl->wspace.pmat; - phtable = idxwspacemalloc(ctrl, nparts); - ndoms = idxwspacemalloc(ctrl, nparts); - - ComputeSubDomainGraph(graph, nparts, pmat, ndoms); - - /* Setup the weight intervals of the various subdomains */ - minwgt = idxwspacemalloc(ctrl, nparts); - maxwgt = idxwspacemalloc(ctrl, nparts); - itpwgts = idxwspacemalloc(ctrl, nparts); - tvwgt = idxsum(nparts, pwgts); - ASSERT(tvwgt == idxsum(nvtxs, graph->vwgt)); - - for (i=0; idbglvl, DBG_REFINE, - printf("Partitions: [%6d %6d]-[%6d %6d], Balance: %5.3f, Nv-Nb[%6d %6d]. Cut: %6d\n", - pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], minwgt[0], maxwgt[0], - 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nvtxs, graph->nbnd, - graph->mincut)); - - for (pass=0; passmincut); - - maxndoms = ndoms[idxamax(nparts, ndoms)]; - - oldcut = graph->mincut; - nbnd = graph->nbnd; - - RandomPermute(nbnd, perm, 1); - for (nmoves=iii=0; iiinbnd; iii++) { - ii = perm[iii]; - if (ii >= nbnd) - continue; - i = bndind[ii]; - - myrinfo = graph->rinfo+i; - - if (myrinfo->ed >= myrinfo->id) { /* Total ED is too high */ - from = where[i]; - vwgt = graph->vwgt[i]; - - if (myrinfo->id > 0 && pwgts[from]-vwgt < minwgt[from]) - continue; /* This cannot be moved! */ - - myedegrees = myrinfo->edegrees; - myndegrees = myrinfo->ndegrees; - - /* Determine the valid domains */ - for (j=0; j maxndoms-1) { - phtable[to] = 0; - nadd = maxndoms; - break; - } - nadd++; - } - } - if (ndoms[to]+nadd > maxndoms) - phtable[to] = 0; - if (nadd == 0) - phtable[to] = 2; - } - - /* Find the first valid move */ - j = myrinfo->id; - for (k=0; kid. Allow good nodes to move */ - if (pwgts[to]+vwgt <= maxwgt[to]+ffactor*gain && gain >= 0) - break; - } - if (k == myndegrees) - continue; /* break out if you did not find a candidate */ - - for (j=k+1; j myedegrees[k].ed && pwgts[to]+vwgt <= maxwgt[to]) || - (myedegrees[j].ed == myedegrees[k].ed && - itpwgts[myedegrees[k].pid]*pwgts[to] < itpwgts[to]*pwgts[myedegrees[k].pid])) - k = j; - } - - to = myedegrees[k].pid; - - j = 0; - if (myedegrees[k].ed-myrinfo->id > 0) - j = 1; - else if (myedegrees[k].ed-myrinfo->id == 0) { - if (/*(iii&7) == 0 ||*/ phtable[myedegrees[k].pid] == 2 || pwgts[from] >= maxwgt[from] || itpwgts[from]*(pwgts[to]+vwgt) < itpwgts[to]*pwgts[from]) - j = 1; - } - if (j == 0) - continue; - - /*===================================================================== - * If we got here, we can now move the vertex from 'from' to 'to' - *======================================================================*/ - graph->mincut -= myedegrees[k].ed-myrinfo->id; - - IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("\t\tMoving %6d to %3d. Gain: %4d. Cut: %6d\n", i, to, myedegrees[k].ed-myrinfo->id, graph->mincut)); - - /* Update pmat to reflect the move of 'i' */ - pmat[from*nparts+to] += (myrinfo->id-myedegrees[k].ed); - pmat[to*nparts+from] += (myrinfo->id-myedegrees[k].ed); - if (pmat[from*nparts+to] == 0) { - ndoms[from]--; - if (ndoms[from]+1 == maxndoms) - maxndoms = ndoms[idxamax(nparts, ndoms)]; - } - if (pmat[to*nparts+from] == 0) { - ndoms[to]--; - if (ndoms[to]+1 == maxndoms) - maxndoms = ndoms[idxamax(nparts, ndoms)]; - } - - /* Update where, weight, and ID/ED information of the vertex you moved */ - where[i] = to; - INC_DEC(pwgts[to], pwgts[from], vwgt); - myrinfo->ed += myrinfo->id-myedegrees[k].ed; - SWAP(myrinfo->id, myedegrees[k].ed, j); - if (myedegrees[k].ed == 0) - myedegrees[k] = myedegrees[--myrinfo->ndegrees]; - else - myedegrees[k].pid = from; - - if (myrinfo->ed-myrinfo->id < 0) - BNDDelete(nbnd, bndind, bndptr, i); - - /* Update the degrees of adjacent vertices */ - for (j=xadj[i]; jrinfo+ii; - if (myrinfo->edegrees == NULL) { - myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree; - ctrl->wspace.cdegree += xadj[ii+1]-xadj[ii]; - } - myedegrees = myrinfo->edegrees; - - ASSERT(CheckRInfo(myrinfo)); - - if (me == from) { - INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); - - if (myrinfo->ed-myrinfo->id >= 0 && bndptr[ii] == -1) - BNDInsert(nbnd, bndind, bndptr, ii); - } - else if (me == to) { - INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); - - if (myrinfo->ed-myrinfo->id < 0 && bndptr[ii] != -1) - BNDDelete(nbnd, bndind, bndptr, ii); - } - - /* Remove contribution from the .ed of 'from' */ - if (me != from) { - for (k=0; kndegrees; k++) { - if (myedegrees[k].pid == from) { - if (myedegrees[k].ed == adjwgt[j]) - myedegrees[k] = myedegrees[--myrinfo->ndegrees]; - else - myedegrees[k].ed -= adjwgt[j]; - break; - } - } - } - - /* Add contribution to the .ed of 'to' */ - if (me != to) { - for (k=0; kndegrees; k++) { - if (myedegrees[k].pid == to) { - myedegrees[k].ed += adjwgt[j]; - break; - } - } - if (k == myrinfo->ndegrees) { - myedegrees[myrinfo->ndegrees].pid = to; - myedegrees[myrinfo->ndegrees++].ed = adjwgt[j]; - } - } - - /* Update pmat to reflect the move of 'i' for domains other than 'from' and 'to' */ - if (me != from && me != to) { - pmat[me*nparts+from] -= adjwgt[j]; - pmat[from*nparts+me] -= adjwgt[j]; - if (pmat[me*nparts+from] == 0) { - ndoms[me]--; - if (ndoms[me]+1 == maxndoms) - maxndoms = ndoms[idxamax(nparts, ndoms)]; - } - if (pmat[from*nparts+me] == 0) { - ndoms[from]--; - if (ndoms[from]+1 == maxndoms) - maxndoms = ndoms[idxamax(nparts, ndoms)]; - } - - if (pmat[me*nparts+to] == 0) { - ndoms[me]++; - if (ndoms[me] > maxndoms) { - IFSET(ctrl->dbglvl, DBG_REFINE, printf("You just increased the maxndoms: %d %d\n", ndoms[me], maxndoms)); - maxndoms = ndoms[me]; - } - } - if (pmat[to*nparts+me] == 0) { - ndoms[to]++; - if (ndoms[to] > maxndoms) { - IFSET(ctrl->dbglvl, DBG_REFINE, printf("You just increased the maxndoms: %d %d\n", ndoms[to], maxndoms)); - maxndoms = ndoms[to]; - } - } - pmat[me*nparts+to] += adjwgt[j]; - pmat[to*nparts+me] += adjwgt[j]; - } - - ASSERT(myrinfo->ndegrees <= xadj[ii+1]-xadj[ii]); - ASSERT(CheckRInfo(myrinfo)); - - } - nmoves++; - } - } - - graph->nbnd = nbnd; - - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("\t[%6d %6d], Balance: %5.3f, Nb: %6d. Nmoves: %5d, Cut: %5d, Vol: %5d, %d\n", - pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], - 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nbnd, nmoves, - graph->mincut, ComputeVolume(graph, where), idxsum(nparts, ndoms))); - - if (graph->mincut == oldcut) - break; - } - - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nvtxs); -} - - - -/************************************************************************* -* This function performs k-way refinement -**************************************************************************/ -void Greedy_KWayEdgeBalanceMConn(CtrlType *ctrl, GraphType *graph, int nparts, float *tpwgts, float ubfactor, int npasses) -{ - int i, ii, iii, j, jj, k, l, pass, nvtxs, nbnd, tvwgt, myndegrees, oldgain, gain, nmoves; - int from, me, to, oldcut, vwgt, maxndoms, nadd; - idxtype *xadj, *adjncy, *adjwgt; - idxtype *where, *pwgts, *perm, *bndptr, *bndind, *minwgt, *maxwgt, *moved, *itpwgts; - idxtype *phtable, *pmat, *pmatptr, *ndoms; - EDegreeType *myedegrees; - RInfoType *myrinfo; - PQueueType queue; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - bndind = graph->bndind; - bndptr = graph->bndptr; - - where = graph->where; - pwgts = graph->pwgts; - - pmat = ctrl->wspace.pmat; - phtable = idxwspacemalloc(ctrl, nparts); - ndoms = idxwspacemalloc(ctrl, nparts); - - ComputeSubDomainGraph(graph, nparts, pmat, ndoms); - - - /* Setup the weight intervals of the various subdomains */ - minwgt = idxwspacemalloc(ctrl, nparts); - maxwgt = idxwspacemalloc(ctrl, nparts); - itpwgts = idxwspacemalloc(ctrl, nparts); - tvwgt = idxsum(nparts, pwgts); - ASSERT(tvwgt == idxsum(nvtxs, graph->vwgt)); - - for (i=0; iadjwgtsum[idxamax(nvtxs, graph->adjwgtsum)]); - - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("Partitions: [%6d %6d]-[%6d %6d], Balance: %5.3f, Nv-Nb[%6d %6d]. Cut: %6d [B]\n", - pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], minwgt[0], maxwgt[0], - 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nvtxs, graph->nbnd, - graph->mincut)); - - for (pass=0; passmincut); - - /* Check to see if things are out of balance, given the tolerance */ - for (i=0; i maxwgt[i]) - break; - } - if (i == nparts) /* Things are balanced. Return right away */ - break; - - PQueueReset(&queue); - idxset(nvtxs, -1, moved); - - oldcut = graph->mincut; - nbnd = graph->nbnd; - - RandomPermute(nbnd, perm, 1); - for (ii=0; iirinfo[i].ed - graph->rinfo[i].id); - moved[i] = 2; - } - - maxndoms = ndoms[idxamax(nparts, ndoms)]; - - for (nmoves=0;;) { - if ((i = PQueueGetMax(&queue)) == -1) - break; - moved[i] = 1; - - myrinfo = graph->rinfo+i; - from = where[i]; - vwgt = graph->vwgt[i]; - - if (pwgts[from]-vwgt < minwgt[from]) - continue; /* This cannot be moved! */ - - myedegrees = myrinfo->edegrees; - myndegrees = myrinfo->ndegrees; - - /* Determine the valid domains */ - for (j=0; j maxndoms-1) { - phtable[to] = 0; - nadd = maxndoms; - break; - } - nadd++; - } - } - if (ndoms[to]+nadd > maxndoms) - phtable[to] = 0; - } - - for (k=0; k minwgt[to] && myedegrees[k].ed-myrinfo->id < 0) - continue; - - /*===================================================================== - * If we got here, we can now move the vertex from 'from' to 'to' - *======================================================================*/ - graph->mincut -= myedegrees[k].ed-myrinfo->id; - - IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("\t\tMoving %6d to %3d. Gain: %4d. Cut: %6d\n", i, to, myedegrees[k].ed-myrinfo->id, graph->mincut)); - - /* Update pmat to reflect the move of 'i' */ - pmat[from*nparts+to] += (myrinfo->id-myedegrees[k].ed); - pmat[to*nparts+from] += (myrinfo->id-myedegrees[k].ed); - if (pmat[from*nparts+to] == 0) { - ndoms[from]--; - if (ndoms[from]+1 == maxndoms) - maxndoms = ndoms[idxamax(nparts, ndoms)]; - } - if (pmat[to*nparts+from] == 0) { - ndoms[to]--; - if (ndoms[to]+1 == maxndoms) - maxndoms = ndoms[idxamax(nparts, ndoms)]; - } - - - /* Update where, weight, and ID/ED information of the vertex you moved */ - where[i] = to; - INC_DEC(pwgts[to], pwgts[from], vwgt); - myrinfo->ed += myrinfo->id-myedegrees[k].ed; - SWAP(myrinfo->id, myedegrees[k].ed, j); - if (myedegrees[k].ed == 0) - myedegrees[k] = myedegrees[--myrinfo->ndegrees]; - else - myedegrees[k].pid = from; - - if (myrinfo->ed == 0) - BNDDelete(nbnd, bndind, bndptr, i); - - /* Update the degrees of adjacent vertices */ - for (j=xadj[i]; jrinfo+ii; - if (myrinfo->edegrees == NULL) { - myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree; - ctrl->wspace.cdegree += xadj[ii+1]-xadj[ii]; - } - myedegrees = myrinfo->edegrees; - - ASSERT(CheckRInfo(myrinfo)); - - oldgain = (myrinfo->ed-myrinfo->id); - - if (me == from) { - INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); - - if (myrinfo->ed > 0 && bndptr[ii] == -1) - BNDInsert(nbnd, bndind, bndptr, ii); - } - else if (me == to) { - INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); - - if (myrinfo->ed == 0 && bndptr[ii] != -1) - BNDDelete(nbnd, bndind, bndptr, ii); - } - - /* Remove contribution from the .ed of 'from' */ - if (me != from) { - for (k=0; kndegrees; k++) { - if (myedegrees[k].pid == from) { - if (myedegrees[k].ed == adjwgt[j]) - myedegrees[k] = myedegrees[--myrinfo->ndegrees]; - else - myedegrees[k].ed -= adjwgt[j]; - break; - } - } - } - - /* Add contribution to the .ed of 'to' */ - if (me != to) { - for (k=0; kndegrees; k++) { - if (myedegrees[k].pid == to) { - myedegrees[k].ed += adjwgt[j]; - break; - } - } - if (k == myrinfo->ndegrees) { - myedegrees[myrinfo->ndegrees].pid = to; - myedegrees[myrinfo->ndegrees++].ed = adjwgt[j]; - } - } - - /* Update pmat to reflect the move of 'i' for domains other than 'from' and 'to' */ - if (me != from && me != to) { - pmat[me*nparts+from] -= adjwgt[j]; - pmat[from*nparts+me] -= adjwgt[j]; - if (pmat[me*nparts+from] == 0) { - ndoms[me]--; - if (ndoms[me]+1 == maxndoms) - maxndoms = ndoms[idxamax(nparts, ndoms)]; - } - if (pmat[from*nparts+me] == 0) { - ndoms[from]--; - if (ndoms[from]+1 == maxndoms) - maxndoms = ndoms[idxamax(nparts, ndoms)]; - } - - if (pmat[me*nparts+to] == 0) { - ndoms[me]++; - if (ndoms[me] > maxndoms) { - IFSET(ctrl->dbglvl, DBG_REFINE, printf("You just increased the maxndoms: %d %d\n", ndoms[me], maxndoms)); - maxndoms = ndoms[me]; - } - } - if (pmat[to*nparts+me] == 0) { - ndoms[to]++; - if (ndoms[to] > maxndoms) { - IFSET(ctrl->dbglvl, DBG_REFINE, printf("You just increased the maxndoms: %d %d\n", ndoms[to], maxndoms)); - maxndoms = ndoms[to]; - } - } - pmat[me*nparts+to] += adjwgt[j]; - pmat[to*nparts+me] += adjwgt[j]; - } - - /* Update the queue */ - if (me == to || me == from) { - gain = myrinfo->ed-myrinfo->id; - if (moved[ii] == 2) { - if (myrinfo->ed > 0) - PQueueUpdate(&queue, ii, oldgain, gain); - else { - PQueueDelete(&queue, ii, oldgain); - moved[ii] = -1; - } - } - else if (moved[ii] == -1 && myrinfo->ed > 0) { - PQueueInsert(&queue, ii, gain); - moved[ii] = 2; - } - } - - ASSERT(myrinfo->ndegrees <= xadj[ii+1]-xadj[ii]); - ASSERT(CheckRInfo(myrinfo)); - } - nmoves++; - } - - graph->nbnd = nbnd; - - IFSET(ctrl->dbglvl, DBG_REFINE, - printf("\t[%6d %6d], Balance: %5.3f, Nb: %6d. Nmoves: %5d, Cut: %6d, %d\n", - pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], - 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nbnd, nmoves, graph->mincut,idxsum(nparts, ndoms))); - } - - PQueueFree(ctrl, &queue); - - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); - -} - - - - -/************************************************************************* -* This function computes the subdomain graph -**************************************************************************/ -void PrintSubDomainGraph(GraphType *graph, int nparts, idxtype *where) -{ - int i, j, k, me, nvtxs, total, max; - idxtype *xadj, *adjncy, *adjwgt, *pmat; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - pmat = idxsmalloc(nparts*nparts, 0, "ComputeSubDomainGraph: pmat"); - - for (i=0; i 0) - k++; - } - total += k; - - if (k > max) - max = k; -/* - printf("%2d -> %2d ", i, k); - for (j=0; j 0) - printf("[%2d %4d] ", j, pmat[i*nparts+j]); - } - printf("\n"); -*/ - } - printf("Total adjacent subdomains: %d, Max: %d\n", total, max); - GKfree((void **)&pmat, LTERM); -} - - - -/************************************************************************* -* This function computes the subdomain graph -**************************************************************************/ -void ComputeSubDomainGraph(GraphType *graph, int nparts, idxtype *pmat, idxtype *ndoms) -{ - int i, j, k, me, nvtxs, ndegrees; - idxtype *xadj, *adjncy, *adjwgt, *where; - RInfoType *rinfo; - EDegreeType *edegrees; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - where = graph->where; - rinfo = graph->rinfo; - - idxset(nparts*nparts, 0, pmat); - - for (i=0; i 0) { - me = where[i]; - ndegrees = rinfo[i].ndegrees; - edegrees = rinfo[i].edegrees; - - k = me*nparts; - for (j=0; j 0) - ndoms[i]++; - } - } - -} - - - - - -/************************************************************************* -* This function computes the subdomain graph -**************************************************************************/ -void EliminateSubDomainEdges(CtrlType *ctrl, GraphType *graph, int nparts, float *tpwgts) -{ - int i, ii, j, k, me, other, nvtxs, total, max, avg, totalout, nind, ncand, ncand2, target, target2, nadd; - int min, move, cpwgt, tvwgt; - idxtype *xadj, *adjncy, *vwgt, *adjwgt, *pwgts, *where, *maxpwgt, *pmat, *ndoms, *mypmat, *otherpmat, *ind; - KeyValueType *cand, *cand2; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; - vwgt = graph->vwgt; - adjwgt = graph->adjwgt; - - where = graph->where; - pwgts = graph->pwgts; /* We assume that this is properly initialized */ - - maxpwgt = idxwspacemalloc(ctrl, nparts); - ndoms = idxwspacemalloc(ctrl, nparts); - otherpmat = idxwspacemalloc(ctrl, nparts); - ind = idxwspacemalloc(ctrl, nvtxs); - pmat = ctrl->wspace.pmat; - - cand = (KeyValueType *)GKmalloc(nparts*sizeof(KeyValueType), "EliminateSubDomainEdges: cand"); - cand2 = (KeyValueType *)GKmalloc(nparts*sizeof(KeyValueType), "EliminateSubDomainEdges: cand"); - - /* Compute the pmat matrix and ndoms */ - ComputeSubDomainGraph(graph, nparts, pmat, ndoms); - - - /* Compute the maximum allowed weight for each domain */ - tvwgt = idxsum(nparts, pwgts); - for (i=0; i 0) { - cand2[ncand2].key = mypmat[i]; - cand2[ncand2++].val = i; - } - } - ikeysort(ncand2, cand2); - - move = 0; - for (min=0; min totalout/(2*ndoms[me])) - break; - - other = cand2[min].val; - - /*printf("\tMinOut: %d to %d\n", mypmat[other], other);*/ - - idxset(nparts, 0, otherpmat); - - /* Go and find the vertices in 'other' that are connected in 'me' */ - for (nind=0, i=0; i 0) { - cand[ncand].key = -otherpmat[i]; - cand[ncand++].val = i; - } - } - ikeysort(ncand, cand); - - /* - * Go through and the select the first domain that is common with 'me', and - * does not increase the ndoms[target] higher than my ndoms, subject to the - * maxpwgt constraint. Traversal is done from the mostly connected to the least. - */ - target = target2 = -1; - for (i=0; i 0) { - if (pwgts[k] + cpwgt > maxpwgt[k]) /* Check if balance will go off */ - continue; - - for (j=0; j 0 && ndoms[j] >= ndoms[me]-1 && pmat[nparts*j+k] == 0) - break; - } - if (j == nparts) { /* No bad second level effects */ - for (nadd=0, j=0; j 0 && pmat[nparts*k+j] == 0) - nadd++; - } - - /*printf("\t\tto=%d, nadd=%d, %d\n", k, nadd, ndoms[k]);*/ - if (target2 == -1 && ndoms[k]+nadd < ndoms[me]) { - target2 = k; - } - if (nadd == 0) { - target = k; - break; - } - } - } - } - if (target == -1 && target2 != -1) - target = target2; - - if (target == -1) { - /* printf("\t\tCould not make the move\n");*/ - continue; - } - - /*printf("\t\tMoving to %d\n", target);*/ - - /* Update the partition weights */ - INC_DEC(pwgts[target], pwgts[other], cpwgt); - - MoveGroupMConn(ctrl, graph, ndoms, pmat, nparts, target, nind, ind); - - move = 1; - break; - } - - if (move == 0) - break; - } - - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nvtxs); - - GKfree((void **)&cand, &cand2, LTERM); -} - - -/************************************************************************* -* This function moves a collection of vertices and updates their rinfo -**************************************************************************/ -void MoveGroupMConn(CtrlType *ctrl, GraphType *graph, idxtype *ndoms, idxtype *pmat, - int nparts, int to, int nind, idxtype *ind) -{ - int i, ii, iii, j, jj, k, l, nvtxs, nbnd, myndegrees; - int from, me; - idxtype *xadj, *adjncy, *adjwgt; - idxtype *where, *bndptr, *bndind; - EDegreeType *myedegrees; - RInfoType *myrinfo; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - where = graph->where; - bndptr = graph->bndptr; - bndind = graph->bndind; - - nbnd = graph->nbnd; - - for (iii=0; iiirinfo+i; - if (myrinfo->edegrees == NULL) { - myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree; - ctrl->wspace.cdegree += xadj[i+1]-xadj[i]; - myrinfo->ndegrees = 0; - } - myedegrees = myrinfo->edegrees; - - /* find the location of 'to' in myrinfo or create it if it is not there */ - for (k=0; kndegrees; k++) { - if (myedegrees[k].pid == to) - break; - } - if (k == myrinfo->ndegrees) { - myedegrees[k].pid = to; - myedegrees[k].ed = 0; - myrinfo->ndegrees++; - } - - graph->mincut -= myedegrees[k].ed-myrinfo->id; - - /* Update pmat to reflect the move of 'i' */ - pmat[from*nparts+to] += (myrinfo->id-myedegrees[k].ed); - pmat[to*nparts+from] += (myrinfo->id-myedegrees[k].ed); - if (pmat[from*nparts+to] == 0) - ndoms[from]--; - if (pmat[to*nparts+from] == 0) - ndoms[to]--; - - /* Update where, weight, and ID/ED information of the vertex you moved */ - where[i] = to; - myrinfo->ed += myrinfo->id-myedegrees[k].ed; - SWAP(myrinfo->id, myedegrees[k].ed, j); - if (myedegrees[k].ed == 0) - myedegrees[k] = myedegrees[--myrinfo->ndegrees]; - else - myedegrees[k].pid = from; - - if (myrinfo->ed-myrinfo->id < 0 && bndptr[i] != -1) - BNDDelete(nbnd, bndind, bndptr, i); - - /* Update the degrees of adjacent vertices */ - for (j=xadj[i]; jrinfo+ii; - if (myrinfo->edegrees == NULL) { - myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree; - ctrl->wspace.cdegree += xadj[ii+1]-xadj[ii]; - } - myedegrees = myrinfo->edegrees; - - ASSERT(CheckRInfo(myrinfo)); - - if (me == from) { - INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); - - if (myrinfo->ed-myrinfo->id >= 0 && bndptr[ii] == -1) - BNDInsert(nbnd, bndind, bndptr, ii); - } - else if (me == to) { - INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); - - if (myrinfo->ed-myrinfo->id < 0 && bndptr[ii] != -1) - BNDDelete(nbnd, bndind, bndptr, ii); - } - - /* Remove contribution from the .ed of 'from' */ - if (me != from) { - for (k=0; kndegrees; k++) { - if (myedegrees[k].pid == from) { - if (myedegrees[k].ed == adjwgt[j]) - myedegrees[k] = myedegrees[--myrinfo->ndegrees]; - else - myedegrees[k].ed -= adjwgt[j]; - break; - } - } - } - - /* Add contribution to the .ed of 'to' */ - if (me != to) { - for (k=0; kndegrees; k++) { - if (myedegrees[k].pid == to) { - myedegrees[k].ed += adjwgt[j]; - break; - } - } - if (k == myrinfo->ndegrees) { - myedegrees[myrinfo->ndegrees].pid = to; - myedegrees[myrinfo->ndegrees++].ed = adjwgt[j]; - } - } - - /* Update pmat to reflect the move of 'i' for domains other than 'from' and 'to' */ - if (me != from && me != to) { - pmat[me*nparts+from] -= adjwgt[j]; - pmat[from*nparts+me] -= adjwgt[j]; - if (pmat[me*nparts+from] == 0) - ndoms[me]--; - if (pmat[from*nparts+me] == 0) - ndoms[from]--; - - if (pmat[me*nparts+to] == 0) - ndoms[me]++; - if (pmat[to*nparts+me] == 0) - ndoms[to]++; - - pmat[me*nparts+to] += adjwgt[j]; - pmat[to*nparts+me] += adjwgt[j]; - } - - ASSERT(CheckRInfo(myrinfo)); - } - - ASSERT(CheckRInfo(graph->rinfo+i)); - } - - graph->nbnd = nbnd; - -} - - - - -/************************************************************************* -* This function finds all the connected components induced by the -* partitioning vector in wgraph->where and tries to push them around to -* remove some of them -**************************************************************************/ -void EliminateComponents(CtrlType *ctrl, GraphType *graph, int nparts, float *tpwgts, float ubfactor) -{ - int i, ii, j, jj, k, me, nvtxs, tvwgt, first, last, nleft, ncmps, cwgt, other, target, deltawgt; - idxtype *xadj, *adjncy, *vwgt, *adjwgt, *where, *pwgts, *maxpwgt; - idxtype *cpvec, *touched, *perm, *todo, *cind, *cptr, *npcmps; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; - vwgt = graph->vwgt; - adjwgt = graph->adjwgt; - - where = graph->where; - pwgts = graph->pwgts; - - touched = idxset(nvtxs, 0, idxwspacemalloc(ctrl, nvtxs)); - cptr = idxwspacemalloc(ctrl, nvtxs+1); - cind = idxwspacemalloc(ctrl, nvtxs); - perm = idxwspacemalloc(ctrl, nvtxs); - todo = idxwspacemalloc(ctrl, nvtxs); - maxpwgt = idxwspacemalloc(ctrl, nparts); - cpvec = idxwspacemalloc(ctrl, nparts); - npcmps = idxset(nparts, 0, idxwspacemalloc(ctrl, nparts)); - - for (i=0; i 0) { - if (first == last) { /* Find another starting vertex */ - cptr[++ncmps] = first; - ASSERT(touched[todo[0]] == 0); - i = todo[0]; - cind[last++] = i; - touched[i] = 1; - me = where[i]; - npcmps[me]++; - } - - i = cind[first++]; - k = perm[i]; - j = todo[k] = todo[--nleft]; - perm[j] = k; - - for (j=xadj[i]; j nparts) { /* There are more components than processors */ - /* First determine the max allowed load imbalance */ - tvwgt = idxsum(nparts, pwgts); - for (i=0; i .30*pwgts[me]) - continue; /* Skip the component if it is over 30% of the weight */ - - /* Determine the connectivity */ - idxset(nparts, 0, cpvec); - for (j=cptr[i]; j 0 && (cwgt < deltawgt || pwgts[j] + cwgt < maxpwgt[j])) { - if (target == -1 || cpvec[target] < cpvec[j]) - target = j; - } - } - - /* printf("\tMoving it to %d [%d]\n", target, cpvec[target]);*/ - - if (target != -1) { - /* Assign all the vertices of 'me' to 'target' and update data structures */ - INC_DEC(pwgts[target], pwgts[me], cwgt); - npcmps[me]--; - - MoveGroup(ctrl, graph, nparts, target, i, cptr, cind); - } - } - - } - - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nparts); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs); - idxwspacefree(ctrl, nvtxs+1); - -} - - -/************************************************************************* -* This function moves a collection of vertices and updates their rinfo -**************************************************************************/ -void MoveGroup(CtrlType *ctrl, GraphType *graph, int nparts, int to, int gid, idxtype *ptr, idxtype *ind) -{ - int i, ii, iii, j, jj, k, l, nvtxs, nbnd, myndegrees; - int from, me; - idxtype *xadj, *adjncy, *adjwgt; - idxtype *where, *bndptr, *bndind; - EDegreeType *myedegrees; - RInfoType *myrinfo; - - nvtxs = graph->nvtxs; - xadj = graph->xadj; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - where = graph->where; - bndptr = graph->bndptr; - bndind = graph->bndind; - - nbnd = graph->nbnd; - - for (iii=ptr[gid]; iiirinfo+i; - if (myrinfo->edegrees == NULL) { - myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree; - ctrl->wspace.cdegree += xadj[i+1]-xadj[i]; - myrinfo->ndegrees = 0; - } - myedegrees = myrinfo->edegrees; - - /* find the location of 'to' in myrinfo or create it if it is not there */ - for (k=0; kndegrees; k++) { - if (myedegrees[k].pid == to) - break; - } - if (k == myrinfo->ndegrees) { - myedegrees[k].pid = to; - myedegrees[k].ed = 0; - myrinfo->ndegrees++; - } - - graph->mincut -= myedegrees[k].ed-myrinfo->id; - - - /* Update where, weight, and ID/ED information of the vertex you moved */ - where[i] = to; - myrinfo->ed += myrinfo->id-myedegrees[k].ed; - SWAP(myrinfo->id, myedegrees[k].ed, j); - if (myedegrees[k].ed == 0) - myedegrees[k] = myedegrees[--myrinfo->ndegrees]; - else - myedegrees[k].pid = from; - - if (myrinfo->ed-myrinfo->id < 0 && bndptr[i] != -1) - BNDDelete(nbnd, bndind, bndptr, i); - - /* Update the degrees of adjacent vertices */ - for (j=xadj[i]; jrinfo+ii; - if (myrinfo->edegrees == NULL) { - myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree; - ctrl->wspace.cdegree += xadj[ii+1]-xadj[ii]; - } - myedegrees = myrinfo->edegrees; - - ASSERT(CheckRInfo(myrinfo)); - - if (me == from) { - INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); - - if (myrinfo->ed-myrinfo->id >= 0 && bndptr[ii] == -1) - BNDInsert(nbnd, bndind, bndptr, ii); - } - else if (me == to) { - INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); - - if (myrinfo->ed-myrinfo->id < 0 && bndptr[ii] != -1) - BNDDelete(nbnd, bndind, bndptr, ii); - } - - /* Remove contribution from the .ed of 'from' */ - if (me != from) { - for (k=0; kndegrees; k++) { - if (myedegrees[k].pid == from) { - if (myedegrees[k].ed == adjwgt[j]) - myedegrees[k] = myedegrees[--myrinfo->ndegrees]; - else - myedegrees[k].ed -= adjwgt[j]; - break; - } - } - } - - /* Add contribution to the .ed of 'to' */ - if (me != to) { - for (k=0; kndegrees; k++) { - if (myedegrees[k].pid == to) { - myedegrees[k].ed += adjwgt[j]; - break; - } - } - if (k == myrinfo->ndegrees) { - myedegrees[myrinfo->ndegrees].pid = to; - myedegrees[myrinfo->ndegrees++].ed = adjwgt[j]; - } - } - - ASSERT(CheckRInfo(myrinfo)); - } - - ASSERT(CheckRInfo(graph->rinfo+i)); - } - - graph->nbnd = nbnd; - -} - diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/temp/metis.h b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/temp/metis.h index a92cfed5..4d421193 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/temp/metis.h +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/temp/metis.h @@ -1,33 +1,359 @@ -/* - * Copyright 1997, Regents of the University of Minnesota +/*! +\file metis.h +\brief This file contains function prototypes and constant definitions for METIS * - * metis.h - * - * This file includes all necessary header files - * - * Started 8/27/94 - * George - * - * $Id: metis.h,v 1.3 2003/07/25 13:52:00 karypis Exp $ - */ - -/* -#define DEBUG 1 -#define DMALLOC 1 +\author George +\date Started 8/9/02 +\version\verbatim $Id$\endverbatim */ -#include +#ifndef _METIS_H_ +#define _METIS_H_ + +/**************************************************************************** +* A set of defines that can be modified by the user +*****************************************************************************/ + +/*-------------------------------------------------------------------------- + Specifies the width of the elementary data type that will hold information + about vertices and their adjacency lists. + + Possible values: + 32 : Use 32 bit signed integers + 64 : Use 64 bit signed integers + + A width of 64 should be specified if the number of vertices or the total + number of edges in the graph exceed the limits of a 32 bit signed integer + i.e., 2^31-1. + Proper use of 64 bit integers requires that the c99 standard datatypes + int32_t and int64_t are supported by the compiler. + GCC does provides these definitions in stdint.h, but it may require some + modifications on other architectures. +--------------------------------------------------------------------------*/ +#define IDXTYPEWIDTH 32 + +/*-------------------------------------------------------------------------- + Specifies the data type that will hold floating-point style information. + + Possible values: + 32 : single precision floating point (float) + 64 : double precision floating point (double) +--------------------------------------------------------------------------*/ +//#define REALTYPEWIDTH 32 +#define REALTYPEWIDTH 64 + + +/**************************************************************************** +* In principle, nothing needs to be changed beyond this point, unless the +* int32_t and int64_t cannot be found in the normal places. +*****************************************************************************/ -#ifdef DMALLOC -#include +/* Uniform definitions for various compilers */ +#if defined(_MSC_VER) + #define COMPILER_MSC #endif +#if defined(__ICC) + #define COMPILER_ICC +#endif +#if defined(__GNUC__) + #define COMPILER_GCC +#endif + +/* Include c99 int definitions and need constants. When building the library, + * these are already defined by GKlib hence the test for _GKLIB_H_ */ +#ifndef _GKLIB_H_ +#ifdef COMPILER_MSC +#include + +typedef __int32 int32_t +typedef __int64 int64_t +#define PRId32 "I32d" +#define PRId64 "I64d" +#define SCNd32 "ld" +#define SCNd64 "I64d" +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#else +#include +#endif +#endif + + +/*------------------------------------------------------------------------ +* Setup the basic datatypes +*-------------------------------------------------------------------------*/ +#if IDXTYPEWIDTH == 32 + typedef int32_t idx_t + + #define IDX_MAX INT32_MAX + #define IDX_MIN INT32_MIN + + #define SCIDX SCNd32 + #define PRIDX PRId32 + + #define strtoidx strtol + #define iabs abs +#elif IDXTYPEWIDTH == 64 + typedef int64_t idx_t + + #define IDX_MAX INT64_MAX + #define IDX_MIN INT64_MIN + + #define SCIDX SCNd64 + #define PRIDX PRId64 + +#ifdef COMPILER_MSC + #define strtoidx _strtoi64 +#else + #define strtoidx strtoll +#endif + #define iabs labs +#else + #error "Incorrect user-supplied value fo IDXTYPEWIDTH" +#endif + + +#if REALTYPEWIDTH == 32 + typedef float real_t + + #define SCREAL "f" + #define PRREAL "f" + #define REAL_MAX FLT_MAX + #define REAL_MIN FLT_MIN + #define REAL_EPSILON FLT_EPSILON + + #define rabs fabsf + #define REALEQ(x,y) ((rabs((x)-(y)) <= FLT_EPSILON)) + +#ifdef COMPILER_MSC + #define strtoreal (float)strtod +#else + #define strtoreal strtof +#endif +#elif REALTYPEWIDTH == 64 + typedef double real_t + + #define SCREAL "lf" + #define PRREAL "lf" + #define REAL_MAX DBL_MAX + #define REAL_MIN DBL_MIN + #define REAL_EPSILON DBL_EPSILON + + #define rabs fabs + #define REALEQ(x,y) ((rabs((x)-(y)) <= DBL_EPSILON)) + + #define strtoreal strtod +#else + #error "Incorrect user-supplied value for REALTYPEWIDTH" +#endif + + +/*------------------------------------------------------------------------ +* Constant definitions +*-------------------------------------------------------------------------*/ +/* Metis's version number */ +#define METIS_VER_MAJOR 5 +#define METIS_VER_MINOR 2 +#define METIS_VER_SUBMINOR 1 + +/* The maximum length of the options[] array */ +#define METIS_NOPTIONS 40 + + + +/*------------------------------------------------------------------------ +* Function prototypes +*-------------------------------------------------------------------------*/ + +#ifdef _WINDLL +#define METIS_API(type) __declspec(dllexport) type __cdecl +#elif defined(__cdecl) +#define METIS_API(type) type __cdecl +#else +#define METIS_API(type) type +#endif + + + +#ifdef __cplusplus +extern "C" { +#endif + +METIS_API(int) METIS_PartGraphRecursive(idx_t *nvtxs, idx_t *ncon, idx_t *xadj, + idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, + idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, + idx_t *edgecut, idx_t *part) + +METIS_API(int) METIS_PartGraphKway(idx_t *nvtxs, idx_t *ncon, idx_t *xadj, + idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, + idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, + idx_t *edgecut, idx_t *part) + +METIS_API(int) METIS_MeshToDual(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, + idx_t *ncommon, idx_t *numflag, idx_t **r_xadj, idx_t **r_adjncy) + +METIS_API(int) METIS_MeshToNodal(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, + idx_t *numflag, idx_t **r_xadj, idx_t **r_adjncy) + +METIS_API(int) METIS_PartMeshNodal(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, + idx_t *vwgt, idx_t *vsize, idx_t *nparts, real_t *tpwgts, + idx_t *options, idx_t *objval, idx_t *epart, idx_t *npart) + +METIS_API(int) METIS_PartMeshDual(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, + idx_t *vwgt, idx_t *vsize, idx_t *ncommon, idx_t *nparts, + real_t *tpwgts, idx_t *options, idx_t *objval, idx_t *epart, + idx_t *npart) + +METIS_API(int) METIS_NodeND(idx_t *nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t *options, idx_t *perm, idx_t *iperm) + +METIS_API(int) METIS_Free(void *ptr) + +METIS_API(int) METIS_SetDefaultOptions(idx_t *options) + + +/* These functions are used by ParMETIS */ + +METIS_API(int) METIS_NodeNDP(idx_t nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t npes, idx_t *options, idx_t *perm, idx_t *iperm, + idx_t *sizes) + +METIS_API(int) METIS_ComputeVertexSeparator(idx_t *nvtxs, idx_t *xadj, idx_t *adjncy, + idx_t *vwgt, idx_t *options, idx_t *sepsize, idx_t *part) + +METIS_API(int) METIS_NodeRefine(idx_t nvtxs, idx_t *xadj, idx_t *vwgt, idx_t *adjncy, + idx_t *where, idx_t *hmarker, real_t ubfactor) + + +/* These functions are used by DGL */ + +METIS_API(int) METIS_CacheFriendlyReordering(idx_t nvtxs, idx_t *xadj, idx_t *adjncy, + idx_t *part, idx_t *old2new) + +#ifdef __cplusplus +} +#endif + + + +/*------------------------------------------------------------------------ +* Enum type definitions +*-------------------------------------------------------------------------*/ +/*! Return codes */ +typedef enum { + METIS_OK = 1, /*!< Returned normally */ + METIS_ERROR_INPUT = -2, /*!< Returned due to erroneous inputs and/or options */ + METIS_ERROR_MEMORY = -3, /*!< Returned due to insufficient memory */ + METIS_ERROR = -4 /*!< Some other errors */ +} rstatus_et + + +/*! Operation type codes */ +typedef enum { + METIS_OP_PMETIS, + METIS_OP_KMETIS, + METIS_OP_OMETIS +} moptype_et + + +/*! Options codes (i.e., options[]) */ +typedef enum { + METIS_OPTION_PTYPE, + METIS_OPTION_OBJTYPE, + METIS_OPTION_CTYPE, + METIS_OPTION_IPTYPE, + METIS_OPTION_RTYPE, + METIS_OPTION_DBGLVL, + METIS_OPTION_NIPARTS, + METIS_OPTION_NITER, + METIS_OPTION_NCUTS, + METIS_OPTION_SEED, + METIS_OPTION_ONDISK, + METIS_OPTION_MINCONN, + METIS_OPTION_CONTIG, + METIS_OPTION_COMPRESS, + METIS_OPTION_CCORDER, + METIS_OPTION_PFACTOR, + METIS_OPTION_NSEPS, + METIS_OPTION_UFACTOR, + METIS_OPTION_NUMBERING, + METIS_OPTION_DROPEDGES, + METIS_OPTION_NO2HOP, + METIS_OPTION_TWOHOP, + METIS_OPTION_FAST, + + /* Used for command-line parameter purposes */ + METIS_OPTION_HELP, + METIS_OPTION_TPWGTS, + METIS_OPTION_NCOMMON, + METIS_OPTION_NOOUTPUT, + METIS_OPTION_BALANCE, + METIS_OPTION_GTYPE, + METIS_OPTION_UBVEC +} moptions_et + + +/*! Partitioning Schemes */ +typedef enum { + METIS_PTYPE_RB, + METIS_PTYPE_KWAY +} mptype_et + +/*! Graph types for meshes */ +typedef enum { + METIS_GTYPE_DUAL, + METIS_GTYPE_NODAL +} mgtype_et + +/*! Coarsening Schemes */ +typedef enum { + METIS_CTYPE_RM, + METIS_CTYPE_SHEM +} mctype_et + +/*! Initial partitioning schemes */ +typedef enum { + METIS_IPTYPE_GROW, + METIS_IPTYPE_RANDOM, + METIS_IPTYPE_EDGE, + METIS_IPTYPE_NODE, + METIS_IPTYPE_METISRB +} miptype_et + + +/*! Refinement schemes */ +typedef enum { + METIS_RTYPE_FM, + METIS_RTYPE_GREEDY, + METIS_RTYPE_SEP2SIDED, + METIS_RTYPE_SEP1SIDED +} mrtype_et + + +/*! Debug Levels */ +typedef enum { + METIS_DBG_INFO = 1, /*!< Shows various diagnostic messages */ + METIS_DBG_TIME = 2, /*!< Perform timing analysis */ + METIS_DBG_COARSEN = 4, /*!< Show the coarsening progress */ + METIS_DBG_REFINE = 8, /*!< Show the refinement progress */ + METIS_DBG_IPART = 16, /*!< Show info on initial partitioning */ + METIS_DBG_MOVEINFO = 32, /*!< Show info on vertex moves during refinement */ + METIS_DBG_SEPINFO = 64, /*!< Show info on vertex moves during sep refinement */ + METIS_DBG_CONNINFO = 128, /*!< Show info on minimization of subdomain connectivity */ + METIS_DBG_CONTIGINFO = 256, /*!< Show info on elimination of connected components */ + METIS_DBG_MEMORY = 2048 /*!< Show info related to wspace allocation */ +} mdbglvl_et + + +/* Types of objectives */ +typedef enum { + METIS_OBJTYPE_CUT, + METIS_OBJTYPE_VOL, + METIS_OBJTYPE_NODE +} mobjtype_et + -// moved this to metis_svfsi directory -// #include "../metis_svfsi_parmetis.h" /* Get the idxtype definition */ -#include /* Get the idxtype definition */ -#include -#include -#include -#include -#include +#endif /* _METIS_H_ */ diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/temp/metis_defs.h b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/temp/metis_defs.h index 8df42c75..c18350f5 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/temp/metis_defs.h +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/temp/metis_defs.h @@ -8,154 +8,53 @@ * Started 8/27/94 * George * - * $Id: defs.h,v 1.1 2003/07/16 15:55:01 karypis Exp $ + * $Id: defs.h 20398 2016-11-22 17:17:12Z karypis $ * */ -#define METISTITLE " METIS 4.0 Copyright 1998, Regents of the University of Minnesota\n\n" -#define MAXLINE 1280000 - -#define LTERM (void **) 0 /* List terminator for GKfree() */ - -#define MAXNCON 16 /* The maximum number of constrains */ -#define MAXNOBJ 16 /* The maximum number of objectives */ - -#define PLUS_GAINSPAN 500 /* Parameters for FM buckets */ -#define NEG_GAINSPAN 500 - -#define HTLENGTH ((1<<11)-1) - -/* Meaning of various options[] parameters */ -#define OPTION_PTYPE 0 -#define OPTION_CTYPE 1 -#define OPTION_ITYPE 2 -#define OPTION_RTYPE 3 -#define OPTION_DBGLVL 4 -#define OPTION_OFLAGS 5 -#define OPTION_PFACTOR 6 -#define OPTION_NSEPS 7 - -#define OFLAG_COMPRESS 1 /* Try to compress the graph */ -#define OFLAG_CCMP 2 /* Find and order connected components */ - - -/* Default options for PMETIS */ -#define PMETIS_CTYPE MATCH_SHEM -#define PMETIS_ITYPE IPART_GGPKL -#define PMETIS_RTYPE RTYPE_FM -#define PMETIS_DBGLVL 0 - -/* Default options for KMETIS */ -#define KMETIS_CTYPE MATCH_SHEM -#define KMETIS_ITYPE IPART_PMETIS -#define KMETIS_RTYPE RTYPE_KWAYRANDOM_MCONN -#define KMETIS_DBGLVL 0 - -/* Default options for OEMETIS */ -#define OEMETIS_CTYPE MATCH_SHEM -#define OEMETIS_ITYPE IPART_GGPKL -#define OEMETIS_RTYPE RTYPE_FM -#define OEMETIS_DBGLVL 0 +#ifndef _LIBMETIS_DEFS_H_ +#define _LIBMETIS_DEFS_H_ -/* Default options for ONMETIS */ -#define ONMETIS_CTYPE MATCH_SHEM -#define ONMETIS_ITYPE IPART_GGPKL -#define ONMETIS_RTYPE RTYPE_SEP1SIDED -#define ONMETIS_DBGLVL 0 -#define ONMETIS_OFLAGS OFLAG_COMPRESS -#define ONMETIS_PFACTOR -1 -#define ONMETIS_NSEPS 1 - -/* Default options for McPMETIS */ -#define McPMETIS_CTYPE MATCH_SHEBM_ONENORM -#define McPMETIS_ITYPE IPART_RANDOM -#define McPMETIS_RTYPE RTYPE_FM -#define McPMETIS_DBGLVL 0 - -/* Default options for McKMETIS */ -#define McKMETIS_CTYPE MATCH_SHEBM_ONENORM -#define McKMETIS_ITYPE IPART_McHPMETIS -#define McKMETIS_RTYPE RTYPE_KWAYRANDOM -#define McKMETIS_DBGLVL 0 - -/* Default options for KVMETIS */ -#define KVMETIS_CTYPE MATCH_SHEM -#define KVMETIS_ITYPE IPART_PMETIS -#define KVMETIS_RTYPE RTYPE_KWAYRANDOM -#define KVMETIS_DBGLVL 0 - - -/* Operations supported by stand-alone code */ -#define OP_PMETIS 1 -#define OP_KMETIS 2 -#define OP_OEMETIS 3 -#define OP_ONMETIS 4 -#define OP_ONWMETIS 5 -#define OP_KVMETIS 6 - - -/* Matching Schemes */ -#define MATCH_RM 1 -#define MATCH_HEM 2 -#define MATCH_SHEM 3 -#define MATCH_SHEMKWAY 4 -#define MATCH_SHEBM_ONENORM 5 -#define MATCH_SHEBM_INFNORM 6 -#define MATCH_SBHEM_ONENORM 7 -#define MATCH_SBHEM_INFNORM 8 +#define METISTITLE "METIS 5.2.1 Copyright 1998-22, Regents of the University of Minnesota\n" +#define MAXLINE 1280000 -/* Initial partitioning schemes for PMETIS and ONMETIS */ -#define IPART_GGPKL 1 -#define IPART_GGPKLNODE 2 -#define IPART_RANDOM 2 +#define LTERM (void **) 0 /* List terminator for gk_free() */ -/* Refinement schemes for PMETIS */ -#define RTYPE_FM 1 +#define HTLENGTH ((1<<13)-1) -/* Initial partitioning schemes for KMETIS */ -#define IPART_PMETIS 1 +#define INIT_MAXNAD 200 /* Initial number of maximum number of + adjacent domains. This number will be + adjusted as required. */ -/* Refinement schemes for KMETIS */ -#define RTYPE_KWAYRANDOM 1 -#define RTYPE_KWAYGREEDY 2 -#define RTYPE_KWAYRANDOM_MCONN 3 +/* Types of boundaries */ +#define BNDTYPE_REFINE 1 /* Used for k-way refinement-purposes */ +#define BNDTYPE_BALANCE 2 /* Used for k-way balancing purposes */ -/* Refinement schemes for ONMETIS */ -#define RTYPE_SEP2SIDED 1 -#define RTYPE_SEP1SIDED 2 +/* Mode of optimization */ +#define OMODE_REFINE 1 /* Optimize the objective function */ +#define OMODE_BALANCE 2 /* Balance the subdomains */ -/* Initial Partitioning Schemes for McKMETIS */ -#define IPART_McPMETIS 1 /* Simple McPMETIS */ -#define IPART_McHPMETIS 2 /* horizontally relaxed McPMETIS */ +/* Types of vertex statues in the priority queue */ +#define VPQSTATUS_PRESENT 1 /* The vertex is in the queue */ +#define VPQSTATUS_EXTRACTED 2 /* The vertex has been extracted from the queue */ +#define VPQSTATUS_NOTPRESENT 3 /* The vertex is not present in the queue and + has not been extracted before */ #define UNMATCHED -1 -#define HTABLE_EMPTY -1 - -#define NGR_PASSES 4 /* Number of greedy refinement passes */ -#define NLGR_PASSES 5 /* Number of GR refinement during IPartition */ +#define LARGENIPARTS 7 /* Number of random initial partitions */ +#define SMALLNIPARTS 5 /* Number of random initial partitions */ -#define LARGENIPARTS 8 /* Number of random initial partitions */ -#define SMALLNIPARTS 3 /* Number of random initial partitions */ - -#define COARSEN_FRACTION 0.75 /* Node reduction between succesive coarsening levels */ -#define COARSEN_FRACTION2 0.90 /* Node reduction between succesive coarsening levels */ -#define UNBALANCE_FRACTION 1.05 +#define COARSEN_FRACTION 0.85 /* Node reduction between successive coarsening levels */ #define COMPRESSION_FRACTION 0.85 -#define ORDER_UNBALANCE_FRACTION 1.10 - -#define MMDSWITCH 200 +#define MMDSWITCH 120 -#define HORIZONTAL_IMBALANCE 1.05 +/* Default ufactors for the various operational modes */ +#define PMETIS_DEFAULT_UFACTOR 1 +#define MCPMETIS_DEFAULT_UFACTOR 10 +#define KMETIS_DEFAULT_UFACTOR 30 +#define OMETIS_DEFAULT_UFACTOR 200 -/* Debug Levels */ -#define DBG_TIME 1 /* Perform timing analysis */ -#define DBG_OUTPUT 2 -#define DBG_COARSEN 4 /* Show the coarsening progress */ -#define DBG_REFINE 8 /* Show info on communication during folding */ -#define DBG_IPART 16 /* Show info on initial partition */ -#define DBG_MOVEINFO 32 /* Show info on communication during folding */ -#define DBG_KWAYPINFO 64 /* Show info on communication during folding */ -#define DBG_SEPINFO 128 /* Show info on communication during folding */ +#endif diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/timing.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/timing.c index a9d0910b..9d6e05cf 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/timing.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/timing.c @@ -8,34 +8,30 @@ * Started 7/24/97 * George * - * $Id: timing.c,v 1.1 2003/07/16 15:55:20 karypis Exp $ + * $Id: timing.c 13936 2013-03-30 03:59:09Z karypis $ * */ -#include +#include "metislib.h" /************************************************************************* * This function clears the timers **************************************************************************/ -void InitTimers(CtrlType *ctrl) +void InitTimers(ctrl_t *ctrl) { - cleartimer(ctrl->TotalTmr); - cleartimer(ctrl->InitPartTmr); - cleartimer(ctrl->MatchTmr); - cleartimer(ctrl->ContractTmr); - cleartimer(ctrl->CoarsenTmr); - cleartimer(ctrl->UncoarsenTmr); - cleartimer(ctrl->RefTmr); - cleartimer(ctrl->ProjectTmr); - cleartimer(ctrl->SplitTmr); - cleartimer(ctrl->SepTmr); - cleartimer(ctrl->AuxTmr1); - cleartimer(ctrl->AuxTmr2); - cleartimer(ctrl->AuxTmr3); - cleartimer(ctrl->AuxTmr4); - cleartimer(ctrl->AuxTmr5); - cleartimer(ctrl->AuxTmr6); + gk_clearcputimer(ctrl->TotalTmr); + gk_clearcputimer(ctrl->InitPartTmr); + gk_clearcputimer(ctrl->MatchTmr); + gk_clearcputimer(ctrl->ContractTmr); + gk_clearcputimer(ctrl->CoarsenTmr); + gk_clearcputimer(ctrl->UncoarsenTmr); + gk_clearcputimer(ctrl->RefTmr); + gk_clearcputimer(ctrl->ProjectTmr); + gk_clearcputimer(ctrl->SplitTmr); + gk_clearcputimer(ctrl->Aux1Tmr); + gk_clearcputimer(ctrl->Aux2Tmr); + gk_clearcputimer(ctrl->Aux3Tmr); } @@ -43,32 +39,25 @@ void InitTimers(CtrlType *ctrl) /************************************************************************* * This function prints the various timers **************************************************************************/ -void PrintTimers(CtrlType *ctrl) +void PrintTimers(ctrl_t *ctrl) { printf("\nTiming Information -------------------------------------------------"); - printf("\n Multilevel: \t\t %7.3f", gettimer(ctrl->TotalTmr)); - printf("\n Coarsening: \t\t %7.3f", gettimer(ctrl->CoarsenTmr)); - printf("\n Matching: \t\t\t %7.3f", gettimer(ctrl->MatchTmr)); - printf("\n Contract: \t\t\t %7.3f", gettimer(ctrl->ContractTmr)); - printf("\n Initial Partition: \t %7.3f", gettimer(ctrl->InitPartTmr)); - printf("\n Construct Separator: \t %7.3f", gettimer(ctrl->SepTmr)); - printf("\n Uncoarsening: \t\t %7.3f", gettimer(ctrl->UncoarsenTmr)); - printf("\n Refinement: \t\t\t %7.3f", gettimer(ctrl->RefTmr)); - printf("\n Projection: \t\t\t %7.3f", gettimer(ctrl->ProjectTmr)); - printf("\n Splitting: \t\t %7.3f", gettimer(ctrl->SplitTmr)); - printf("\n AUX1: \t\t %7.3f", gettimer(ctrl->AuxTmr1)); - printf("\n AUX2: \t\t %7.3f", gettimer(ctrl->AuxTmr2)); - printf("\n AUX3: \t\t %7.3f", gettimer(ctrl->AuxTmr3)); + printf("\n Multilevel: \t\t %7.3"PRREAL"", gk_getcputimer(ctrl->TotalTmr)); + printf("\n Coarsening: \t\t %7.3"PRREAL"", gk_getcputimer(ctrl->CoarsenTmr)); + printf("\n Matching: \t\t\t %7.3"PRREAL"", gk_getcputimer(ctrl->MatchTmr)); + printf("\n Contract: \t\t\t %7.3"PRREAL"", gk_getcputimer(ctrl->ContractTmr)); + printf("\n Initial Partition: \t %7.3"PRREAL"", gk_getcputimer(ctrl->InitPartTmr)); + printf("\n Uncoarsening: \t\t %7.3"PRREAL"", gk_getcputimer(ctrl->UncoarsenTmr)); + printf("\n Refinement: \t\t\t %7.3"PRREAL"", gk_getcputimer(ctrl->RefTmr)); + printf("\n Projection: \t\t\t %7.3"PRREAL"", gk_getcputimer(ctrl->ProjectTmr)); + printf("\n Splitting: \t\t %7.3"PRREAL"", gk_getcputimer(ctrl->SplitTmr)); +/* + printf("\n Aux1Tmr: \t\t %7.3"PRREAL"", gk_getcputimer(ctrl->Aux1Tmr)); + printf("\n Aux2Tmr: \t\t %7.3"PRREAL"", gk_getcputimer(ctrl->Aux2Tmr)); + printf("\n Aux3Tmr: \t\t %7.3"PRREAL"", gk_getcputimer(ctrl->Aux3Tmr)); +*/ printf("\n********************************************************************\n"); } -/************************************************************************* -* This function returns the seconds -**************************************************************************/ -double seconds(void) -{ - return((double) clock()/CLOCKS_PER_SEC); -} - diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/util.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/util.c index a49e9a07..7fbc4672 100644 --- a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/util.c +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/util.c @@ -8,215 +8,44 @@ * Started 9/28/95 * George * - * $Id: util.c,v 1.2 2003/07/21 18:53:41 karypis Exp $ + * $Id: util.c 10495 2011-07-06 16:04:45Z karypis $ */ -#include +#include "metislib.h" -/************************************************************************* -* This function prints an error message and exits -**************************************************************************/ -void errexit(char *f_str,...) +/*************************************************************************/ +/*! This function initializes the random number generator + */ +/*************************************************************************/ +void InitRandom(idx_t seed) { - va_list argp; - - fprintf(stderr, "[METIS Fatal Error] "); - - va_start(argp, f_str); - vfprintf(stderr, f_str, argp); - va_end(argp); - - if (strlen(f_str) == 0 || f_str[strlen(f_str)-1] != '\n') - fprintf(stderr,"\n"); - fflush(stderr); - - abort(); -} - - - -#ifndef DMALLOC -/************************************************************************* -* The following function allocates an array of integers -**************************************************************************/ -int *imalloc(int n, char *msg) -{ - if (n == 0) - return NULL; - - return (int *)GKmalloc(sizeof(int)*n, msg); -} - - -/************************************************************************* -* The following function allocates an array of integers -**************************************************************************/ -idxtype *idxmalloc(int n, char *msg) -{ - if (n == 0) - return NULL; - - return (idxtype *)GKmalloc(sizeof(idxtype)*n, msg); -} - - -/************************************************************************* -* The following function allocates an array of float -**************************************************************************/ -float *fmalloc(int n, char *msg) -{ - if (n == 0) - return NULL; - - return (float *)GKmalloc(sizeof(float)*n, msg); -} - - -/************************************************************************* -* The follwoing function allocates an array of integers -**************************************************************************/ -int *ismalloc(int n, int ival, char *msg) -{ - if (n == 0) - return NULL; - - return iset(n, ival, (int *)GKmalloc(sizeof(int)*n, msg)); -} - - - -/************************************************************************* -* The follwoing function allocates an array of integers -**************************************************************************/ -idxtype *idxsmalloc(int n, idxtype ival, char *msg) -{ - if (n == 0) - return NULL; - - return idxset(n, ival, (idxtype *)GKmalloc(sizeof(idxtype)*n, msg)); -} - - -/************************************************************************* -* This function is my wrapper around malloc -**************************************************************************/ -void *GKmalloc(int nbytes, char *msg) -{ - void *ptr; - - if (nbytes == 0) - return NULL; - - ptr = (void *)malloc(nbytes); - if (ptr == NULL) - errexit("***Memory allocation failed for %s. Requested size: %d bytes", msg, nbytes); - - return ptr; -} -#endif - -/************************************************************************* -* This function is my wrapper around free, allows multiple pointers -**************************************************************************/ -void GKfree(void **ptr1,...) -{ - va_list plist; - void **ptr; - - if (*ptr1 != NULL) - free(*ptr1); - *ptr1 = NULL; - - va_start(plist, ptr1); - - /* while ((int)(ptr = va_arg(plist, void **)) != -1) { */ - while ((ptr = va_arg(plist, void **)) != LTERM) { - if (*ptr != NULL) - free(*ptr); - *ptr = NULL; - } - - va_end(plist); -} - - -/************************************************************************* -* These functions set the values of a vector -**************************************************************************/ -int *iset(int n, int val, int *x) -{ - int i; - - for (i=0; i x[max] ? i : max); - + max = (x[i]*y[i] > x[max]*y[max] ? i : max); + return max; } -/************************************************************************* -* These functions return the index of the maximum element in a vector -**************************************************************************/ -int idxamax(int n, idxtype *x) +/*************************************************************************/ +/*! These functions return the index of the maximum element in a vector + */ +/*************************************************************************/ +idx_t iargmax_strd(size_t n, idx_t *x, idx_t incx) { - int i, max=0; - - for (i=1; i x[max] ? i : max); - - return max; -} - -/************************************************************************* -* These functions return the index of the maximum element in a vector -**************************************************************************/ -int idxamax_strd(int n, idxtype *x, int incx) -{ - int i, max=0; + size_t i, max=0; n *= incx; for (i=incx; i x[max] ? i : max); - - return max; -} - -/************************************************************************* -* These functions return the index of the almost maximum element in a vector -**************************************************************************/ -int samax2(int n, float *x) +/*************************************************************************/ +/*! These functions return the index of the almost maximum element in a + vector + */ +/*************************************************************************/ +idx_t rargmax2(size_t n, real_t *x) { - int i, max1, max2; + size_t i, max1, max2; if (x[0] > x[1]) { max1 = 0; @@ -269,243 +86,53 @@ int samax2(int n, float *x) } -/************************************************************************* -* These functions return the index of the minimum element in a vector -**************************************************************************/ -int idxamin(int n, idxtype *x) -{ - int i, min=0; - - for (i=1; i x[1]*y[1]) { + max1 = 0; + max2 = 1; } - - return sum; -} - - -/************************************************************************* -* This function sums the entries in an array -**************************************************************************/ -void idxadd(int n, idxtype *x, idxtype *y) -{ - for (n--; n>=0; n--) - y[n] += x[n]; -} - - -/************************************************************************* -* This function sums the entries in an array -**************************************************************************/ -int charsum(int n, char *x) -{ - int i, sum = 0; - - for (i=0; i x[max1]*y[max1]) { + max2 = max1; + max1 = i; + } + else if (x[i]*y[i] > x[max2]*y[max2]) + max2 = i; } -} - - -/************************************************************************* -* This function returns true if the a is a power of 2 -**************************************************************************/ -int ispow2(int a) -{ - for (; a%2 != 1; a = a>>1); - return (a > 1 ? 0 : 1); + return max2; } -/************************************************************************* -* This function initializes the random number generator -**************************************************************************/ -void InitRandom(int seed) -{ - if (seed == -1) - srand(4321); - else - srand(seed); +/*************************************************************************/ +/*! converts a signal code into a Metis return code + */ +/*************************************************************************/ +int metis_rcode(int sigrval) +{ + switch (sigrval) { + case 0: + return METIS_OK; + break; + case SIGMEM: + return METIS_ERROR_MEMORY; + break; + default: + return METIS_ERROR; + break; + } } -/************************************************************************* -* This function returns the log2(x) -**************************************************************************/ -int log2Int(int a) -{ - int i; - - for (i=1; a > 1; i++, a = a>>1); - return i-1; -} diff --git a/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/wspace.c b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/wspace.c new file mode 100644 index 00000000..92853c39 --- /dev/null +++ b/Code/ThirdParty/metis_svfsi/simvascular_metis_svfsi/METISLib/wspace.c @@ -0,0 +1,219 @@ +/*! +\file +\brief Functions dealing with memory allocation and workspace management + +\date Started 2/24/96 +\author George +\author Copyright 1997-2009, Regents of the University of Minnesota +\version $Id: wspace.c 10492 2011-07-06 09:28:42Z karypis $ +*/ + +#include "metislib.h" + + +/*************************************************************************/ +/*! This function allocates memory for the workspace */ +/*************************************************************************/ +void AllocateWorkSpace(ctrl_t *ctrl, graph_t *graph) +{ + size_t coresize; + + switch (ctrl->optype) { + case METIS_OP_PMETIS: + coresize = 3*(graph->nvtxs+1)*sizeof(idx_t) + + 5*(ctrl->nparts+1)*graph->ncon*sizeof(idx_t) + + 5*(ctrl->nparts+1)*graph->ncon*sizeof(real_t); + break; + default: + coresize = 4*(graph->nvtxs+1)*sizeof(idx_t) + + 5*(ctrl->nparts+1)*graph->ncon*sizeof(idx_t) + + 5*(ctrl->nparts+1)*graph->ncon*sizeof(real_t); + } + ctrl->mcore = gk_mcoreCreate(coresize); + + ctrl->nbrpoolsize = 0; + ctrl->nbrpoolcpos = 0; +} + + +/*************************************************************************/ +/*! This function allocates refinement-specific memory for the workspace */ +/*************************************************************************/ +void AllocateRefinementWorkSpace(ctrl_t *ctrl, idx_t nbrpoolsize_max, idx_t nbrpoolsize) +{ + ctrl->nbrpoolsize_max = nbrpoolsize_max; + ctrl->nbrpoolsize = nbrpoolsize; + ctrl->nbrpoolcpos = 0; + ctrl->nbrpoolreallocs = 0; + + switch (ctrl->objtype) { + case METIS_OBJTYPE_CUT: + ctrl->cnbrpool = (cnbr_t *)gk_malloc(ctrl->nbrpoolsize*sizeof(cnbr_t), + "AllocateRefinementWorkSpace: cnbrpool"); + break; + + case METIS_OBJTYPE_VOL: + ctrl->vnbrpool = (vnbr_t *)gk_malloc(ctrl->nbrpoolsize*sizeof(vnbr_t), + "AllocateRefinementWorkSpace: vnbrpool"); + break; + + default: + gk_errexit(SIGERR, "Unknown objtype of %d\n", ctrl->objtype); + } + + + /* Allocate the memory for the sparse subdomain graph */ + if (ctrl->minconn) { + ctrl->pvec1 = imalloc(ctrl->nparts+1, "AllocateRefinementWorkSpace: pvec1"); + ctrl->pvec2 = imalloc(ctrl->nparts+1, "AllocateRefinementWorkSpace: pvec2"); + ctrl->maxnads = ismalloc(ctrl->nparts, INIT_MAXNAD, "AllocateRefinementWorkSpace: maxnads"); + ctrl->nads = imalloc(ctrl->nparts, "AllocateRefinementWorkSpace: nads"); + ctrl->adids = iAllocMatrix(ctrl->nparts, INIT_MAXNAD, 0, "AllocateRefinementWorkSpace: adids"); + ctrl->adwgts = iAllocMatrix(ctrl->nparts, INIT_MAXNAD, 0, "AllocateRefinementWorkSpace: adwgts"); + } +} + + +/*************************************************************************/ +/*! This function frees the workspace */ +/*************************************************************************/ +void FreeWorkSpace(ctrl_t *ctrl) +{ + gk_mcoreDestroy(&ctrl->mcore, ctrl->dbglvl&METIS_DBG_INFO); + + IFSET(ctrl->dbglvl, METIS_DBG_INFO, + printf(" nbrpool statistics\n" + " nbrpoolsize: %12zu nbrpoolcpos: %12zu\n" + " nbrpoolreallocs: %12zu\n\n", + ctrl->nbrpoolsize, ctrl->nbrpoolcpos, + ctrl->nbrpoolreallocs)); + + gk_free((void **)&ctrl->cnbrpool, &ctrl->vnbrpool, LTERM); + ctrl->nbrpoolsize_max = 0; + ctrl->nbrpoolsize = 0; + ctrl->nbrpoolcpos = 0; + + if (ctrl->minconn) { + iFreeMatrix(&(ctrl->adids), ctrl->nparts, INIT_MAXNAD); + iFreeMatrix(&(ctrl->adwgts), ctrl->nparts, INIT_MAXNAD); + + gk_free((void **)&ctrl->pvec1, &ctrl->pvec2, + &ctrl->maxnads, &ctrl->nads, LTERM); + } +} + + +/*************************************************************************/ +/*! This function allocate space from the workspace/heap */ +/*************************************************************************/ +void *wspacemalloc(ctrl_t *ctrl, size_t nbytes) +{ + return gk_mcoreMalloc(ctrl->mcore, nbytes); +} + + +/*************************************************************************/ +/*! This function sets a marker in the stack of malloc ops to be used + subsequently for freeing purposes */ +/*************************************************************************/ +void wspacepush(ctrl_t *ctrl) +{ + gk_mcorePush(ctrl->mcore); +} + + +/*************************************************************************/ +/*! This function frees all mops since the last push */ +/*************************************************************************/ +void wspacepop(ctrl_t *ctrl) +{ + gk_mcorePop(ctrl->mcore); +} + + +/*************************************************************************/ +/*! This function allocate space from the core */ +/*************************************************************************/ +idx_t *iwspacemalloc(ctrl_t *ctrl, idx_t n) +{ + return (idx_t *)wspacemalloc(ctrl, n*sizeof(idx_t)); +} + + +/*************************************************************************/ +/*! This function allocate space from the core */ +/*************************************************************************/ +real_t *rwspacemalloc(ctrl_t *ctrl, idx_t n) +{ + return (real_t *)wspacemalloc(ctrl, n*sizeof(real_t)); +} + + +/*************************************************************************/ +/*! This function allocate space from the core */ +/*************************************************************************/ +ikv_t *ikvwspacemalloc(ctrl_t *ctrl, idx_t n) +{ + return (ikv_t *)wspacemalloc(ctrl, n*sizeof(ikv_t)); +} + + +/*************************************************************************/ +/*! This function resets the cnbrpool */ +/*************************************************************************/ +void cnbrpoolReset(ctrl_t *ctrl) +{ + ctrl->nbrpoolcpos = 0; +} + + +/*************************************************************************/ +/*! This function gets the next free index from cnbrpool */ +/*************************************************************************/ +idx_t cnbrpoolGetNext(ctrl_t *ctrl, idx_t nnbrs) +{ + nnbrs = gk_min(ctrl->nparts, nnbrs); + ctrl->nbrpoolcpos += nnbrs; + + if (ctrl->nbrpoolcpos > ctrl->nbrpoolsize) { + ctrl->nbrpoolsize += gk_max(10*nnbrs, ctrl->nbrpoolsize/2); + ctrl->nbrpoolsize = gk_min(ctrl->nbrpoolsize, ctrl->nbrpoolsize_max); + + ctrl->cnbrpool = (cnbr_t *)gk_realloc(ctrl->cnbrpool, + ctrl->nbrpoolsize*sizeof(cnbr_t), "cnbrpoolGet: cnbrpool"); + ctrl->nbrpoolreallocs++; + } + + return ctrl->nbrpoolcpos - nnbrs; +} + + +/*************************************************************************/ +/*! This function resets the vnbrpool */ +/*************************************************************************/ +void vnbrpoolReset(ctrl_t *ctrl) +{ + ctrl->nbrpoolcpos = 0; +} + + +/*************************************************************************/ +/*! This function gets the next free index from vnbrpool */ +/*************************************************************************/ +idx_t vnbrpoolGetNext(ctrl_t *ctrl, idx_t nnbrs) +{ + nnbrs = gk_min(ctrl->nparts, nnbrs); + ctrl->nbrpoolcpos += nnbrs; + + if (ctrl->nbrpoolcpos > ctrl->nbrpoolsize) { + ctrl->nbrpoolsize += gk_max(10*nnbrs, ctrl->nbrpoolsize/2); + ctrl->nbrpoolsize = gk_min(ctrl->nbrpoolsize, ctrl->nbrpoolsize_max); + + ctrl->vnbrpool = (vnbr_t *)gk_realloc(ctrl->vnbrpool, + ctrl->nbrpoolsize*sizeof(vnbr_t), "vnbrpoolGet: vnbrpool"); + ctrl->nbrpoolreallocs++; + } + + return ctrl->nbrpoolcpos - nnbrs; +} + diff --git a/Code/ThirdParty/parmetis_svfsi/README.md b/Code/ThirdParty/parmetis_svfsi/README.md new file mode 100644 index 00000000..1f62cc0e --- /dev/null +++ b/Code/ThirdParty/parmetis_svfsi/README.md @@ -0,0 +1,6 @@ + +ParMETIS is an MPI-based library for partitioning graphs, partitioning finite element meshes, and producing fill +reducing orderings for sparse matrices. + +The source was downloaded from https://github.com/KarypisLab/METIS 10/2023. + diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/CMakeLists.txt b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/CMakeLists.txt index f895cc61..4dc5c341 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/CMakeLists.txt +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/CMakeLists.txt @@ -1,19 +1,17 @@ -include_directories(./) -include_directories(${MPI_C_INCLUDE_PATH}) - -set(CSRCS comm.c util.c debug.c setup.c grsetup.c timer.c - node_refine.c initmsection.c - xyzpart.c pspases.c frename.c - iintsort.c iidxsort.c ikeysort.c ikeyvalsort.c - kmetis.c gkmetis.c ometis.c - initpart.c match.c - kwayfm.c kwayrefine.c kwaybalance.c - remap.c stat.c fpqueue.c - ametis.c rmetis.c initbalance.c - mdiffusion.c diffutil.c wave.c - csrmatch.c redomylink.c balancemylink.c - selectq.c akwayfm.c serial.c move.c - mmetis.c mesh.c memory.c weird.c backcompat.c) + +#include_directories(./) +#include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../../gklib_svfsi/simvascular_gklib_svfsi) +#include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../../metis_svfsi/simvascular_metis_svfsi/METISLib) +#include_directories(${MPI_C_INCLUDE_PATH}) + +set(CSRCS akwayfm.c ametis.c balancemylink.c comm.c + csrmatch.c ctrl.c debug.c diffutil.c frename.c gklib.c + gkmetis.c gkmpi.c graph.c initbalance.c initmsection.c initpart.c + kmetis.c kwayrefine.c match.c mdiffusion.c mesh.c mmetis.c + move.c msetup.c node_refine.c ometis.c pspases.c redomylink.c + remap.c renumber.c rmetis.c selectq.c serial.c stat.c + timer.c util.c wave.c weird.c wspace.c xyzpart.c +) if(SV_USE_THIRDPARTY_SHARED_LIBRARIES) add_library(${PARMETIS_SVFSI_LIBRARY_NAME} SHARED ${CSRCS}) @@ -21,7 +19,14 @@ else() add_library(${PARMETIS_SVFSI_LIBRARY_NAME} STATIC ${CSRCS}) endif() -target_link_libraries(${PARMETIS_SVFSI_LIBRARY_NAME} ${METIS_SVFSI_LIBRARY_NAME}) +set(third_party_dir ${CMAKE_CURRENT_SOURCE_DIR}/../../..) +target_include_directories(${PARMETIS_SVFSI_LIBRARY_NAME} PUBLIC ./) +target_include_directories(${PARMETIS_SVFSI_LIBRARY_NAME} PUBLIC ${third_party_dir}/gklib_svfsi/simvascular_gklib_svfsi) +target_include_directories(${PARMETIS_SVFSI_LIBRARY_NAME} PUBLIC ${third_party_dir}/metis_svfsi/simvascular_metis_svfsi/METISLib) +target_include_directories(${PARMETIS_SVFSI_LIBRARY_NAME} PUBLIC ${MPI_C_INCLUDE_PATH}) + + +target_link_libraries(${PARMETIS_SVFSI_LIBRARY_NAME} ${METIS_SVFSI_LIBRARY_NAME} ${GKLIB_SVFSI_LIBRARY_NAME}) if(SV_INSTALL_LIBS) install(TARGETS ${PARMETIS_SVFSI_LIBRARY_NAME} diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/adrivers.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/adrivers.c deleted file mode 100644 index ed209178..00000000 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/adrivers.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * adrivers.c - * - * This file contains the driving routines for the various parallel - * multilevel partitioning and repartitioning algorithms - * - * Started 11/19/96 - * George - * - * $Id: adrivers.c,v 1.5 2003/07/30 18:37:58 karypis Exp $ - * - */ - -#include - - - -/************************************************************************* -* This function is the driver for the adaptive refinement mode of ParMETIS -**************************************************************************/ -void Adaptive_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) -{ - int i; - int tewgt, tvsize; - float gtewgt, gtvsize; - float ubavg, lbavg, lbvec[MAXNCON]; - - /************************************/ - /* Set up important data structures */ - /************************************/ - SetUp(ctrl, graph, wspace); - - ubavg = savg(graph->ncon, ctrl->ubvec); - tewgt = idxsum(graph->nedges, graph->adjwgt); - tvsize = idxsum(graph->nvtxs, graph->vsize); - gtewgt = (float) GlobalSESum(ctrl, tewgt) + 1.0; /* The +1 were added to remove any FPE */ - gtvsize = (float) GlobalSESum(ctrl, tvsize) + 1.0; - ctrl->redist_factor = ctrl->redist_base * ((gtewgt/gtvsize)/ ctrl->edge_size_ratio); - - IFSET(ctrl->dbglvl, DBG_PROGRESS, rprintf(ctrl, "[%6d %8d %5d %5d][%d]\n", - graph->gnvtxs, GlobalSESum(ctrl, graph->nedges), GlobalSEMin(ctrl, graph->nvtxs), GlobalSEMax(ctrl, graph->nvtxs), ctrl->CoarsenTo)); - - if (graph->gnvtxs < 1.3*ctrl->CoarsenTo || - (graph->finer != NULL && graph->gnvtxs > graph->finer->gnvtxs*COARSEN_FRACTION)) { - - /***********************************************/ - /* Balance the partition on the coarsest graph */ - /***********************************************/ - graph->where = idxsmalloc(graph->nvtxs+graph->nrecv, -1, "graph->where"); - idxcopy(graph->nvtxs, graph->home, graph->where); - - Mc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec); - lbavg = savg(graph->ncon, lbvec); - - if (lbavg > ubavg + 0.035 && ctrl->partType != REFINE_PARTITION) - Balance_Partition(ctrl, graph, wspace); - - if (ctrl->dbglvl&DBG_PROGRESS) { - Mc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec); - rprintf(ctrl, "nvtxs: %10d, balance: ", graph->gnvtxs); - for (i=0; incon; i++) - rprintf(ctrl, "%.3f ", lbvec[i]); - rprintf(ctrl, "\n"); - } - - /* check if no coarsening took place */ - if (graph->finer == NULL) { - Mc_ComputePartitionParams(ctrl, graph, wspace); - Mc_KWayBalance(ctrl, graph, wspace, graph->ncon); - Mc_KWayAdaptiveRefine(ctrl, graph, wspace, NGR_PASSES); - } - } - else { - /*******************************/ - /* Coarsen it and partition it */ - /*******************************/ - switch (ctrl->ps_relation) { - case PARMETIS_PSR_COUPLED: - Match_Local(ctrl, graph, wspace); - break; - case PARMETIS_PSR_UNCOUPLED: - default: - Match_Global(ctrl, graph, wspace); - break; - } - - Adaptive_Partition(ctrl, graph->coarser, wspace); - - /********************************/ - /* project partition and refine */ - /********************************/ - Mc_ProjectPartition(ctrl, graph, wspace); - Mc_ComputePartitionParams(ctrl, graph, wspace); - - if (graph->ncon > 1 && graph->level < 4) { - Mc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec); - lbavg = savg(graph->ncon, lbvec); - - if (lbavg > ubavg + 0.025) { - Mc_KWayBalance(ctrl, graph, wspace, graph->ncon); - } - } - - Mc_KWayAdaptiveRefine(ctrl, graph, wspace, NGR_PASSES); - - if (ctrl->dbglvl&DBG_PROGRESS) { - Mc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec); - rprintf(ctrl, "nvtxs: %10d, cut: %8d, balance: ", graph->gnvtxs, graph->mincut); - for (i=0; incon; i++) - rprintf(ctrl, "%.3f ", lbvec[i]); - rprintf(ctrl, "\n"); - } - } -} - diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/akwayfm.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/akwayfm.c index b3716b89..b4fc5faf 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/akwayfm.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/akwayfm.c @@ -8,7 +8,7 @@ * Started 3/1/96 * George * - * $Id: akwayfm.c,v 1.3 2003/07/22 22:58:18 karypis Exp $ + * $Id: akwayfm.c 10528 2011-07-09 19:47:30Z karypis $ */ #include @@ -20,115 +20,114 @@ /************************************************************************* * This function performs k-way refinement **************************************************************************/ -void Mc_KWayAdaptiveRefine(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, int npasses) +void KWayAdaptiveRefine(ctrl_t *ctrl, graph_t *graph, idx_t npasses) { - int h, i, ii, iii, j, k, c; - int pass, nvtxs, nedges, ncon; - int nmoves, nmoved; - int me, firstvtx, lastvtx, yourlastvtx; - int from, to = -1, oldto, oldcut, mydomain, yourdomain, imbalanced, overweight; - int npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts; - int nlupd, nsupd, nnbrs, nchanged; - idxtype *xadj, *ladjncy, *adjwgt, *vtxdist; - idxtype *where, *tmp_where, *moved; - float *lnpwgts, *gnpwgts, *ognpwgts, *pgnpwgts, *movewgts, *overfill; - idxtype *update, *supdate, *rupdate, *pe_updates; - idxtype *changed, *perm, *pperm, *htable; - idxtype *peind, *recvptr, *sendptr; - KeyValueType *swchanges, *rwchanges; - RInfoType *rinfo, *myrinfo, *tmp_myrinfo, *tmp_rinfo; - EdgeType *tmp_edegrees, *my_edegrees, *your_edegrees; - float lbvec[MAXNCON], *nvwgt, *badmaxpwgt, *ubvec, *tpwgts, lbavg, ubavg; - float oldgain, gain; - float ipc_factor, redist_factor, vsize; - int *nupds_pe, ndirty, nclean, dptr; - int better, worse; + idx_t npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts; + idx_t h, i, ii, iii, j, k, c; + idx_t pass, nvtxs, nedges, ncon; + idx_t nmoves, nmoved; + idx_t me, firstvtx, lastvtx, yourlastvtx; + idx_t from, to = -1, oldto, oldcut, mydomain, yourdomain, imbalanced, overweight; + idx_t nlupd, nsupd, nnbrs, nchanged, *nupds_pe; + idx_t *xadj, *ladjncy, *adjwgt, *vtxdist; + idx_t *where, *tmp_where, *moved, *oldEDs; + real_t *lnpwgts, *gnpwgts, *ognpwgts, *pgnpwgts, *movewgts, *overfill; + idx_t *update, *supdate, *rupdate, *pe_updates; + idx_t *changed, *perm, *pperm, *htable; + idx_t *peind, *recvptr, *sendptr; + ikv_t *swchanges, *rwchanges; + real_t *lbvec, *nvwgt, *badmaxpwgt, *ubvec, *tpwgts, lbavg, ubavg; + real_t oldgain, gain; + real_t ipc_factor, redist_factor, vsize; + idx_t ndirty, nclean, dptr; + idx_t better, worse; + ckrinfo_t *myrinfo; + cnbr_t *mynbrs; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayTmr)); + WCOREPUSH; /*************************/ /* set up common aliases */ /*************************/ - nvtxs = graph->nvtxs; + nvtxs = graph->nvtxs; nedges = graph->nedges; - ncon = graph->ncon; + ncon = graph->ncon; vtxdist = graph->vtxdist; - xadj = graph->xadj; + xadj = graph->xadj; ladjncy = graph->adjncy; - adjwgt = graph->adjwgt; + adjwgt = graph->adjwgt; firstvtx = vtxdist[mype]; - lastvtx = vtxdist[mype+1]; + lastvtx = vtxdist[mype+1]; - where = graph->where; - rinfo = graph->rinfo; + where = graph->where; lnpwgts = graph->lnpwgts; gnpwgts = graph->gnpwgts; - ubvec = ctrl->ubvec; - tpwgts = ctrl->tpwgts; - ipc_factor = ctrl->ipc_factor; + + ubvec = ctrl->ubvec; + tpwgts = ctrl->tpwgts; + ipc_factor = ctrl->ipc_factor; redist_factor = ctrl->redist_factor; - nnbrs = graph->nnbrs; - peind = graph->peind; + nnbrs = graph->nnbrs; + peind = graph->peind; recvptr = graph->recvptr; sendptr = graph->sendptr; - changed = idxmalloc(nvtxs, "AKWR: changed"); - rwchanges = wspace->pairs; - swchanges = rwchanges + recvptr[nnbrs]; - /************************************/ /* set up important data structures */ /************************************/ - perm = idxmalloc(nvtxs, "AKWR: perm"); - pperm = idxmalloc(nparts, "AKWR: pperm"); + lbvec = rwspacemalloc(ctrl, ncon); + + badmaxpwgt = rwspacemalloc(ctrl, nparts*ncon); + movewgts = rwspacemalloc(ctrl, nparts*ncon); + ognpwgts = rwspacemalloc(ctrl, nparts*ncon); + pgnpwgts = rwspacemalloc(ctrl, nparts*ncon); + overfill = rwspacemalloc(ctrl, nparts*ncon); + + pperm = iwspacemalloc(ctrl, nparts); + nupds_pe = iwspacemalloc(ctrl, npes); + + oldEDs = iwspacemalloc(ctrl, nvtxs); + changed = iwspacemalloc(ctrl, nvtxs); + perm = iwspacemalloc(ctrl, nvtxs); + update = iwspacemalloc(ctrl, nvtxs); + moved = iwspacemalloc(ctrl, nvtxs); + htable = iset(nvtxs+graph->nrecv, 0, iwspacemalloc(ctrl, nvtxs+graph->nrecv)); + + rwchanges = ikvwspacemalloc(ctrl, graph->nrecv); + swchanges = ikvwspacemalloc(ctrl, graph->nsend); + supdate = iwspacemalloc(ctrl, graph->nrecv); + rupdate = iwspacemalloc(ctrl, graph->nsend); - update = idxmalloc(nvtxs, "AKWR: update"); - supdate = wspace->indices; - rupdate = supdate + recvptr[nnbrs]; - nupds_pe = imalloc(npes, "AKWR: nupds_pe"); - htable = idxsmalloc(nvtxs+graph->nrecv, 0, "AKWR: lhtable"); - badmaxpwgt = fmalloc(nparts*ncon, "badmaxpwgt"); + tmp_where = iwspacemalloc(ctrl, nvtxs+graph->nrecv); for (i=0; inrecv, "AKWR: tmp_where"); - tmp_rinfo = (RInfoType *)GKmalloc(sizeof(RInfoType)*nvtxs, "AKWR: tmp_rinfo"); - tmp_edegrees = (EdgeType *)GKmalloc(sizeof(EdgeType)*nedges, "AKWR: tmp_edegrees"); - - idxcopy(nvtxs+graph->nrecv, where, tmp_where); - for (i=0; inrecv, where, tmp_where); + + /* this will record the overall external degrees of the vertices + prior to a inner refinement iteration in order to allow for + the proper updating of the lmincut */ + for (i=0; ickrinfo[i].ed; + /*********************************************************/ /* perform a small number of passes through the vertices */ /*********************************************************/ for (pass=0; passmincut; if (mype == 0) RandomPermute(nparts, pperm, 1); - MPI_Bcast((void *)pperm, nparts, IDX_DATATYPE, 0, ctrl->comm); -/* FastRandomPermute(nvtxs, perm, 1); */ + + gkMPI_Bcast((void *)pperm, nparts, IDX_T, 0, ctrl->comm); + oldcut = graph->mincut; + /* FastRandomPermute(nvtxs, perm, 1); */ /*****************************/ /* move dirty vertices first */ @@ -145,21 +144,21 @@ void Mc_KWayAdaptiveRefine(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa else perm[ndirty++] = i; - ASSERT(ctrl, ndirty == nvtxs); + PASSERT(ctrl, ndirty == nvtxs); ndirty = dptr; nclean = nvtxs-dptr; FastRandomPermute(ndirty, perm, 0); FastRandomPermute(nclean, perm+ndirty, 0); /* check to see if the partitioning is imbalanced */ - Mc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec); - ubavg = savg(ncon, ubvec); - lbavg = savg(ncon, lbvec); + ComputeParallelBalance(ctrl, graph, graph->where, lbvec); + ubavg = ravg(ncon, ubvec); + lbavg = ravg(ncon, lbvec); imbalanced = (lbavg > ubavg) ? 1 : 0; for (c=0; c<2; c++) { - scopy(ncon*nparts, gnpwgts, ognpwgts); - sset(ncon*nparts, 0.0, movewgts); + rcopy(ncon*nparts, gnpwgts, ognpwgts); + rset(ncon*nparts, 0.0, movewgts); nmoved = 0; /**********************************************/ @@ -169,23 +168,25 @@ void Mc_KWayAdaptiveRefine(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa i = perm[iii]; from = tmp_where[i]; nvwgt = graph->nvwgt+i*ncon; - vsize = (float)(graph->vsize[i]); + vsize = (real_t)(graph->vsize[i]); for (h=0; hckrinfo + i; + if (myrinfo->ed <= 0) continue; - my_edegrees = tmp_rinfo[i].degrees; + PASSERT(ctrl, myrinfo->inbr != -1); + mynbrs = ctrl->cnbrpool + myrinfo->inbr; - for (k=0; knnbrs-1; k>=0; k--) { + to = mynbrs[k].pid; if (ProperSide(c, pperm[from], pperm[to])) { for (h=0; h badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0) @@ -195,146 +196,145 @@ void Mc_KWayAdaptiveRefine(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa break; } } + + /* break out if you did not find a candidate */ + if (k < 0) + continue; + oldto = to; + /**************************/ + /**************************/ + switch (ctrl->ps_relation) { + case PARMETIS_PSR_COUPLED: + better = (oldto == mype) ? 1 : 0; + worse = (from == mype) ? 1 : 0; + break; + case PARMETIS_PSR_UNCOUPLED: + default: + better = (oldto == graph->home[i]) ? 1 : 0; + worse = (from == graph->home[i]) ? 1 : 0; + break; + } + /**************************/ + /**************************/ - /* check if a subdomain was found that fits */ - if (k < tmp_rinfo[i].ndegrees) { - /**************************/ - /**************************/ - switch (ctrl->ps_relation) { - case PARMETIS_PSR_COUPLED: - better = (oldto == mype) ? 1 : 0; - worse = (from == mype) ? 1 : 0; - break; - case PARMETIS_PSR_UNCOUPLED: - default: - better = (oldto == graph->home[i]) ? 1 : 0; - worse = (from == graph->home[i]) ? 1 : 0; - break; - } - /**************************/ - /**************************/ - - oldgain = ipc_factor * (float)(my_edegrees[k].ewgt-tmp_rinfo[i].id); - if (better) oldgain += redist_factor * vsize; - if (worse) oldgain -= redist_factor * vsize; - - for (j=k+1; jps_relation) { - case PARMETIS_PSR_COUPLED: - better = (to == mype) ? 1 : 0; - break; - case PARMETIS_PSR_UNCOUPLED: - default: - better = (to == graph->home[i]) ? 1 : 0; - break; - } - /**************************/ - /**************************/ + oldgain = ipc_factor * (real_t)(mynbrs[k].ed-myrinfo->id); + if (better) oldgain += redist_factor * vsize; + if (worse) oldgain -= redist_factor * vsize; - gain = ipc_factor * (float)(my_edegrees[j].ewgt-tmp_rinfo[i].id); - if (better) gain += redist_factor * vsize; - if (worse) gain -= redist_factor * vsize; + for (j=k-1; j>=0; j--) { + to = mynbrs[j].pid; + if (ProperSide(c, pperm[from], pperm[to])) { + /**************************/ + /**************************/ + switch (ctrl->ps_relation) { + case PARMETIS_PSR_COUPLED: + better = (to == mype) ? 1 : 0; + break; + case PARMETIS_PSR_UNCOUPLED: + default: + better = (to == graph->home[i]) ? 1 : 0; + break; + } + /**************************/ + /**************************/ - for (h=0; h badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0) - break; + gain = ipc_factor * (real_t)(mynbrs[j].ed-myrinfo->id); + if (better) gain += redist_factor * vsize; + if (worse) gain -= redist_factor * vsize; - if (h == ncon) { - if (gain > oldgain || - (fabs(gain-oldgain) < SMALLFLOAT && - IsHBalanceBetterTT(ncon,gnpwgts+oldto*ncon,gnpwgts+to*ncon,nvwgt,ubvec))){ - oldgain = gain; - oldto = to; - k = j; - } + for (h=0; h badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0) + break; + } + + if (h == ncon) { + if (gain > oldgain || + (rabs(gain-oldgain) < SMALLFLOAT && + IsHBalanceBetterTT(ncon,gnpwgts+oldto*ncon,gnpwgts+to*ncon,nvwgt,ubvec))){ + oldgain = gain; + oldto = to; + k = j; } } } - to = oldto; - gain = oldgain; - - if (gain > 0.0 || - (gain > -1.0*SMALLFLOAT && - (imbalanced || graph->level > 3 || iii % 8 == 0) && - IsHBalanceBetterFT(ncon,gnpwgts+from*ncon,gnpwgts+to*ncon,nvwgt,ubvec))){ + } + to = oldto; + gain = oldgain; + + if (gain > 0.0 || + (gain > -1.0*SMALLFLOAT && + (imbalanced || graph->level > 3 || iii % 8 == 0) && + IsHBalanceBetterFT(ncon,gnpwgts+from*ncon,gnpwgts+to*ncon,nvwgt,ubvec))) { + + /****************************************/ + /* Update tmp arrays of the moved vertex */ + /****************************************/ + tmp_where[i] = to; + moved[nmoved++] = i; + for (h=0; hed += myrinfo->id-mynbrs[k].ed; + gk_SWAP(myrinfo->id, mynbrs[k].ed, j); + if (mynbrs[k].ed == 0) + mynbrs[k] = mynbrs[--myrinfo->nnbrs]; + else + mynbrs[k].pid = from; + + /* Update the degrees of adjacent vertices */ + for (j=xadj[i]; j= nvtxs) + continue; + + me = ladjncy[j]; + mydomain = tmp_where[me]; + + myrinfo = graph->ckrinfo+me; + if (myrinfo->inbr == -1) { + myrinfo->inbr = cnbrpoolGetNext(ctrl, xadj[me+1]-xadj[me]); + myrinfo->nnbrs = 0; } + mynbrs = ctrl->cnbrpool + myrinfo->inbr; - tmp_rinfo[i].ed += tmp_rinfo[i].id-my_edegrees[k].ewgt; - SWAP(tmp_rinfo[i].id, my_edegrees[k].ewgt, j); - if (my_edegrees[k].ewgt == 0) { - tmp_rinfo[i].ndegrees--; - my_edegrees[k].edge = my_edegrees[tmp_rinfo[i].ndegrees].edge; - my_edegrees[k].ewgt = my_edegrees[tmp_rinfo[i].ndegrees].ewgt; + if (mydomain == from) { + INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); } else { - my_edegrees[k].edge = from; + if (mydomain == to) { + INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); + } } - /* Update the degrees of adjacent vertices */ - for (j=xadj[i]; j= nvtxs) - continue; - - me = ladjncy[j]; - mydomain = tmp_where[me]; - - myrinfo = tmp_rinfo+me; - your_edegrees = myrinfo->degrees; - - if (mydomain == from) { - INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); - } - else { - if (mydomain == to) { - INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); + /* Remove contribution from the .ed of 'from' */ + if (mydomain != from) { + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == from) { + if (mynbrs[k].ed == adjwgt[j]) + mynbrs[k] = mynbrs[--myrinfo->nnbrs]; + else + mynbrs[k].ed -= adjwgt[j]; + break; } } + } - /* Remove contribution from the .ed of 'from' */ - if (mydomain != from) { - for (k=0; kndegrees; k++) { - if (your_edegrees[k].edge == from) { - if (your_edegrees[k].ewgt == adjwgt[j]) { - myrinfo->ndegrees--; - your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge; - your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt; - } - else { - your_edegrees[k].ewgt -= adjwgt[j]; - } - break; - } + /* Add contribution to the .ed of 'to' */ + if (mydomain != to) { + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == to) { + mynbrs[k].ed += adjwgt[j]; + break; } } - - /* Add contribution to the .ed of 'to' */ - if (mydomain != to) { - for (k=0; kndegrees; k++) { - if (your_edegrees[k].edge == to) { - your_edegrees[k].ewgt += adjwgt[j]; - break; - } - } - if (k == myrinfo->ndegrees) { - your_edegrees[myrinfo->ndegrees].edge = to; - your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j]; - } + if (k == myrinfo->nnbrs) { + mynbrs[k].pid = to; + mynbrs[k].ed = adjwgt[j]; + myrinfo->nnbrs++; } } } @@ -345,7 +345,8 @@ void Mc_KWayAdaptiveRefine(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa /* Let processors know the subdomain wgts */ /* if all proposed moves commit. */ /******************************************/ - MPI_Allreduce((void *)lnpwgts, (void *)pgnpwgts, nparts*ncon, MPI_FLOAT, MPI_SUM, ctrl->comm); + gkMPI_Allreduce((void *)lnpwgts, (void *)pgnpwgts, nparts*ncon, REAL_T, + MPI_SUM, ctrl->comm); /**************************/ /* compute overfill array */ @@ -354,18 +355,21 @@ void Mc_KWayAdaptiveRefine(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa for (j=0; j ognpwgts[j*ncon+h]) - overfill[j*ncon+h] = (pgnpwgts[j*ncon+h]-badmaxpwgt[j*ncon+h]) / (pgnpwgts[j*ncon+h]-ognpwgts[j*ncon+h]); + overfill[j*ncon+h] = (pgnpwgts[j*ncon+h]-badmaxpwgt[j*ncon+h]) / + (pgnpwgts[j*ncon+h]-ognpwgts[j*ncon+h]); else overfill[j*ncon+h] = 0.0; - overfill[j*ncon+h] = amax(overfill[j*ncon+h], 0.0); + overfill[j*ncon+h] =gk_max(overfill[j*ncon+h], 0.0); overfill[j*ncon+h] *= movewgts[j*ncon+h]; if (overfill[j*ncon+h] > 0.0) overweight = 1; - ASSERTP(ctrl, ognpwgts[j*ncon+h] <= badmaxpwgt[j*ncon+h] || pgnpwgts[j*ncon+h] <= ognpwgts[j*ncon+h], - (ctrl, "%.4f %.4f %.4f\n", ognpwgts[j*ncon+h], badmaxpwgt[j*ncon+h], pgnpwgts[j*ncon+h])); + PASSERTP(ctrl, ognpwgts[j*ncon+h] <= badmaxpwgt[j*ncon+h] || + pgnpwgts[j*ncon+h] <= ognpwgts[j*ncon+h], + (ctrl, "%.4"PRREAL" %.4"PRREAL" %.4"PRREAL"\n", + ognpwgts[j*ncon+h], badmaxpwgt[j*ncon+h], pgnpwgts[j*ncon+h])); } } @@ -374,41 +378,42 @@ void Mc_KWayAdaptiveRefine(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa /****************************************************/ if (overweight == 1) { for (iii=0; iiinvwgt+i*ncon; - my_edegrees = tmp_rinfo[i].degrees; - for (k=0; kckrinfo + i; + mynbrs = ctrl->cnbrpool + myrinfo->inbr; + PASSERT(ctrl, myrinfo->nnbrs == 0 || myrinfo->inbr != -1); + + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == where[i]) break; + } - for (h=0; h 0.0 && overfill[oldto*ncon+h] > nvwgt[h]/4.0) break; + } /**********************************/ /* nullify this move if necessary */ /**********************************/ - if (k != tmp_rinfo[i].ndegrees && h != ncon) { + if (k != myrinfo->nnbrs && h != ncon) { moved[iii] = -1; from = oldto; - to = where[i]; + to = where[i]; for (h=0; hed += myrinfo->id-mynbrs[k].ed; + gk_SWAP(myrinfo->id, mynbrs[k].ed, j); + if (mynbrs[k].ed == 0) + mynbrs[k] = mynbrs[--myrinfo->nnbrs]; + else + mynbrs[k].pid = from; for (h=0; hdegrees; + myrinfo = graph->ckrinfo+me; + if (myrinfo->inbr == -1) { + myrinfo->inbr = cnbrpoolGetNext(ctrl, xadj[me+1]-xadj[me]); + myrinfo->nnbrs = 0; + } + mynbrs = ctrl->cnbrpool + myrinfo->inbr; if (mydomain == from) { INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); @@ -436,16 +445,12 @@ void Mc_KWayAdaptiveRefine(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa /* Remove contribution from the .ed of 'from' */ if (mydomain != from) { - for (k=0; kndegrees; k++) { - if (your_edegrees[k].edge == from) { - if (your_edegrees[k].ewgt == adjwgt[j]) { - myrinfo->ndegrees--; - your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge; - your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt; - } - else { - your_edegrees[k].ewgt -= adjwgt[j]; - } + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == from) { + if (mynbrs[k].ed == adjwgt[j]) + mynbrs[k] = mynbrs[--myrinfo->nnbrs]; + else + mynbrs[k].ed -= adjwgt[j]; break; } } @@ -453,15 +458,16 @@ void Mc_KWayAdaptiveRefine(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa /* Add contribution to the .ed of 'to' */ if (mydomain != to) { - for (k=0; kndegrees; k++) { - if (your_edegrees[k].edge == to) { - your_edegrees[k].ewgt += adjwgt[j]; + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == to) { + mynbrs[k].ed += adjwgt[j]; break; } } - if (k == myrinfo->ndegrees) { - your_edegrees[myrinfo->ndegrees].edge = to; - your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j]; + if (k == myrinfo->nnbrs) { + mynbrs[k].pid = to; + mynbrs[k].ed = adjwgt[j]; + myrinfo->nnbrs++; } } } @@ -483,7 +489,7 @@ void Mc_KWayAdaptiveRefine(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa /* Make sure to update the vertex information */ if (htable[i] == 0) { /* make sure you do the update */ - htable[i] = 1; + htable[i] = 1; update[nlupd++] = i; } @@ -505,11 +511,14 @@ void Mc_KWayAdaptiveRefine(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa } /* Tell interested pe's the new where[] info for the interface vertices */ - CommChangedInterfaceData(ctrl, graph, nchanged, changed, where, swchanges, rwchanges, wspace->pv4); + CommChangedInterfaceData(ctrl, graph, nchanged, changed, where, swchanges, rwchanges); - - IFSET(ctrl->dbglvl, DBG_RMOVEINFO, rprintf(ctrl, "\t[%d %d], [%.4f], [%d %d %d]\n", - pass, c, badmaxpwgt[0], GlobalSESum(ctrl, nmoves), GlobalSESum(ctrl, nsupd), GlobalSESum(ctrl, nlupd))); + IFSET(ctrl->dbglvl, DBG_RMOVEINFO, + rprintf(ctrl, + "\t[%"PRIDX" %"PRIDX"], [%.4"PRREAL"], [%"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"]\n", + pass, c, badmaxpwgt[0], + GlobalSESum(ctrl, nmoved), GlobalSESum(ctrl, nmoves), + GlobalSESum(ctrl, nsupd), GlobalSESum(ctrl, nlupd))); /*------------------------------------------------------------- / Time to communicate with processors to send the vertices @@ -517,31 +526,32 @@ void Mc_KWayAdaptiveRefine(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa /-------------------------------------------------------------*/ /* Issue the receives first */ for (i=0; icomm, ctrl->rreq+i); + gkMPI_Irecv((void *)(rupdate+sendptr[i]), sendptr[i+1]-sendptr[i], + IDX_T, peind[i], 1, ctrl->comm, ctrl->rreq+i); /* Issue the sends next. This needs some preporcessing */ for (i=0; iimap[supdate[i]]; } - iidxsort(nsupd, supdate); + isorti(nsupd, supdate); for (j=i=0; icomm, ctrl->sreq+i); + gkMPI_Isend((void *)(supdate+j), k-j, IDX_T, peind[i], 1, ctrl->comm, ctrl->sreq+i); j = k; } /* OK, now get into the loop waiting for the send/recv operations to finish */ - MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); + gkMPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); for (i=0; istatuses+i, IDX_DATATYPE, nupds_pe+i); - MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); + gkMPI_Get_count(ctrl->statuses+i, IDX_T, nupds_pe+i); + gkMPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); /*------------------------------------------------------------- - / Place the recieved to-be updated vertices into update[] + / Place the received to-be updated vertices into update[] /-------------------------------------------------------------*/ for (i=0; idegrees; - your_edegrees = tmp_myrinfo->degrees; + myrinfo = graph->ckrinfo+i; + + if (myrinfo->inbr == -1) + myrinfo->inbr = cnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]); + mynbrs = ctrl->cnbrpool + myrinfo->inbr; - graph->lmincut -= myrinfo->ed; - myrinfo->ndegrees = 0; - myrinfo->id = 0; - myrinfo->ed = 0; + graph->lmincut -= oldEDs[i]; + myrinfo->nnbrs = 0; + myrinfo->id = 0; + myrinfo->ed = 0; for (j=xadj[i]; jed += adjwgt[j]; - for (k=0; kndegrees; k++) { - if (my_edegrees[k].edge == yourdomain) { - my_edegrees[k].ewgt += adjwgt[j]; - your_edegrees[k].ewgt += adjwgt[j]; + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == yourdomain) { + mynbrs[k].ed += adjwgt[j]; break; } } - if (k == myrinfo->ndegrees) { - my_edegrees[k].edge = yourdomain; - my_edegrees[k].ewgt = adjwgt[j]; - your_edegrees[k].edge = yourdomain; - your_edegrees[k].ewgt = adjwgt[j]; - myrinfo->ndegrees++; + if (k == myrinfo->nnbrs) { + mynbrs[k].pid = yourdomain; + mynbrs[k].ed = adjwgt[j]; + myrinfo->nnbrs++; } - ASSERT(ctrl, myrinfo->ndegrees <= xadj[i+1]-xadj[i]); - ASSERT(ctrl, tmp_myrinfo->ndegrees <= xadj[i+1]-xadj[i]); - + PASSERT(ctrl, myrinfo->nnbrs <= xadj[i+1]-xadj[i]); } else { myrinfo->id += adjwgt[j]; } } graph->lmincut += myrinfo->ed; - - tmp_myrinfo->id = myrinfo->id; - tmp_myrinfo->ed = myrinfo->ed; - tmp_myrinfo->ndegrees = myrinfo->ndegrees; + oldEDs[i] = myrinfo->ed; /* for the next iteration */ } /* finally, sum-up the partition weights */ - MPI_Allreduce((void *)lnpwgts, (void *)gnpwgts, nparts*ncon, MPI_FLOAT, MPI_SUM, ctrl->comm); + gkMPI_Allreduce((void *)lnpwgts, (void *)gnpwgts, nparts*ncon, + REAL_T, MPI_SUM, ctrl->comm); } graph->mincut = GlobalSESum(ctrl, graph->lmincut)/2; + IFSET(ctrl->dbglvl, DBG_RMOVEINFO, + rprintf(ctrl, "\t\tcut: %"PRIDX"\n", graph->mincut)); + if (graph->mincut == oldcut) break; } - GKfree((void **)&badmaxpwgt, (void **)&update, (void **)&nupds_pe, (void **)&htable, LTERM); - GKfree((void **)&changed, (void **)&pperm, (void **)&perm, (void **)&moved, LTERM); - GKfree((void **)&pgnpwgts, (void **)&ognpwgts, (void **)&overfill, (void **)&movewgts, LTERM); - GKfree((void **)&tmp_where, (void **)&tmp_rinfo, (void **)&tmp_edegrees, LTERM); + WCOREPOP; IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayTmr)); } diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/ametis.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/ametis.c index e190fc62..7d314a11 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/ametis.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/ametis.c @@ -8,231 +8,179 @@ * Started 10/19/96 * George * - * $Id: ametis.c,v 1.6 2003/07/25 04:01:03 karypis Exp $ + * $Id: ametis.c 10757 2011-09-15 22:07:47Z karypis $ * */ #include - /*********************************************************************************** * This function is the entry point of the parallel multilevel local diffusion * algorithm. It uses parallel undirected diffusion followed by adaptive k-way * refinement. This function utilizes local coarsening. ************************************************************************************/ -void ParMETIS_V3_AdaptiveRepart(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, - idxtype *vwgt, idxtype *vsize, idxtype *adjwgt, int *wgtflag, int *numflag, - int *ncon, int *nparts, float *tpwgts, float *ubvec, float *ipc2redist, - int *options, int *edgecut, idxtype *part, MPI_Comm *comm) +int ParMETIS_V3_AdaptiveRepart(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, + idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, idx_t *wgtflag, idx_t *numflag, + idx_t *ncon, idx_t *nparts, real_t *tpwgts, real_t *ubvec, real_t *ipc2redist, + idx_t *options, idx_t *edgecut, idx_t *part, MPI_Comm *comm) { - int h, i; - int npes, mype; - CtrlType ctrl; - WorkSpaceType wspace; - GraphType *graph; - int tewgt, tvsize, nmoved, maxin, maxout, vtx_factor; - float gtewgt, gtvsize, avg, maximb; - int ps_relation, seed, dbglvl = 0; - int iwgtflag, inumflag, incon, inparts, ioptions[10]; - float iipc2redist, *itpwgts, iubvec[MAXNCON]; - - MPI_Comm_size(*comm, &npes); - MPI_Comm_rank(*comm, &mype); - - /* Deal with poor vertex distributions */ - ctrl.comm = *comm; - if (GlobalSEMin(&ctrl, vtxdist[mype+1]-vtxdist[mype]) < 1) { - if (mype == 0) - printf("Error: Poor vertex distribution (processor with no vertices).\n"); - return; - } + idx_t i, npes, mype, status; + ctrl_t *ctrl=NULL; + graph_t *graph=NULL; + size_t curmem; + + /* Check the input parameters and return if an error */ + status = CheckInputsAdaptiveRepart(vtxdist, xadj, adjncy, vwgt, vsize, adjwgt, + wgtflag, numflag, ncon, nparts, tpwgts, ubvec, ipc2redist, options, + edgecut, part, comm); + if (GlobalSEMinComm(*comm, status) == 0) + return METIS_ERROR; - /********************************/ - /* Try and take care bad inputs */ - /********************************/ - if (options != NULL && options[0] == 1) - dbglvl = options[PMV3_OPTION_DBGLVL]; - CheckInputs(ADAPTIVE_PARTITION, npes, dbglvl, wgtflag, &iwgtflag, numflag, &inumflag, - ncon, &incon, nparts, &inparts, tpwgts, &itpwgts, ubvec, iubvec, - ipc2redist, &iipc2redist, options, ioptions, part, comm); + status = METIS_OK; + gk_malloc_init(); + curmem = gk_GetCurMemoryUsed(); - /* ADD: take care of disconnected graph */ + /* Setup the ctrl */ + ctrl = SetupCtrl(PARMETIS_OP_AMETIS, options, *ncon, *nparts, tpwgts, ubvec, *comm); + npes = ctrl->npes; + mype = ctrl->mype; - /*********************************/ - /* Take care the nparts = 1 case */ - /*********************************/ - if (inparts == 1) { - idxset(vtxdist[mype+1]-vtxdist[mype], 0, part); + + /* Take care the nparts == 1 case */ + if (*nparts == 1) { + iset(vtxdist[mype+1]-vtxdist[mype], (*numflag == 0 ? 0 : 1), part); *edgecut = 0; - return; + goto DONE; } - /**************************/ - /* Set up data structures */ - /**************************/ - if (inumflag == 1) + + /* Setup the graph */ + if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1); - /*****************************/ - /* Set up control structures */ - /*****************************/ - if (ioptions[0] == 1) { - dbglvl = ioptions[PMV3_OPTION_DBGLVL]; - seed = ioptions[PMV3_OPTION_SEED]; - ps_relation = (npes == inparts ? ioptions[PMV3_OPTION_PSR] : PARMETIS_PSR_UNCOUPLED); - } - else { - dbglvl = GLOBAL_DBGLVL; - seed = GLOBAL_SEED; - ps_relation = (npes == inparts ? PARMETIS_PSR_COUPLED : PARMETIS_PSR_UNCOUPLED); - } + graph = SetupGraph(ctrl, *ncon, vtxdist, xadj, vwgt, vsize, adjncy, adjwgt, *wgtflag); - SetUpCtrl(&ctrl, inparts, dbglvl, *comm); - vtx_factor = (amax(npes, inparts) > 256) ? 20 : 50; - ctrl.CoarsenTo = amin(vtxdist[npes]+1, vtx_factor*incon*amax(npes, inparts)); - ctrl.ipc_factor = iipc2redist; - ctrl.redist_factor = 1.0; - ctrl.redist_base = 1.0; - ctrl.seed = (seed == 0 ? mype : seed*mype); - ctrl.sync = GlobalSEMax(&ctrl, seed); - ctrl.partType = ADAPTIVE_PARTITION; - ctrl.ps_relation = ps_relation; - ctrl.tpwgts = itpwgts; - - graph = Mc_SetUpGraph(&ctrl, incon, vtxdist, xadj, vwgt, adjncy, adjwgt, &iwgtflag); - graph->vsize = (vsize == NULL ? idxsmalloc(graph->nvtxs, 1, "vsize") : vsize); - - graph->home = idxmalloc(graph->nvtxs, "home"); - if (ctrl.ps_relation == PARMETIS_PSR_COUPLED) - idxset(graph->nvtxs, mype, graph->home); + if (ctrl->ps_relation == PARMETIS_PSR_COUPLED) + iset(graph->nvtxs, mype, graph->home); else { /* Downgrade the partition numbers if part[] has more partitions that nparts */ for (i=0; invtxs; i++) - part[i] = (part[i] >= ctrl.nparts ? 0 : part[i]); + part[i] = (part[i] >= ctrl->nparts ? 0 : part[i]); - idxcopy(graph->nvtxs, part, graph->home); + icopy(graph->nvtxs, part, graph->home); } - tewgt = idxsum(graph->nedges, graph->adjwgt); - tvsize = idxsum(graph->nvtxs, graph->vsize); - gtewgt = (float) GlobalSESum(&ctrl, tewgt) + 1.0/graph->gnvtxs; /* The +1/graph->gnvtxs were added to remove any FPE */ - gtvsize = (float) GlobalSESum(&ctrl, tvsize) + 1.0/graph->gnvtxs; - ctrl.edge_size_ratio = gtewgt/gtvsize; - scopy(incon, iubvec, ctrl.ubvec); - AllocateWSpace(&ctrl, graph, &wspace); + /* Allocate the workspace */ + AllocateWSpace(ctrl, 10*graph->nvtxs); + - /***********************/ /* Partition and Remap */ - /***********************/ - IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); + STARTTIMER(ctrl, ctrl->TotalTmr); - Adaptive_Partition(&ctrl, graph, &wspace); - ParallelReMapGraph(&ctrl, graph, &wspace); + ctrl->ipc_factor = *ipc2redist; + ctrl->CoarsenTo = gk_min(graph->gnvtxs+1, + (gk_max(npes, *nparts) > 256 ? 20 : 50)*(*ncon)*gk_max(npes, *nparts)); - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); + Adaptive_Partition(ctrl, graph); + ParallelReMapGraph(ctrl, graph); + + icopy(graph->nvtxs, graph->where, part); + *edgecut = graph->mincut; + + STOPTIMER(ctrl, ctrl->TotalTmr); - idxcopy(graph->nvtxs, graph->where, part); - if (edgecut != NULL) - *edgecut = graph->mincut; - /***********************/ /* Take care of output */ - /***********************/ - IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - - if (ctrl.dbglvl&DBG_INFO) { - Mc_ComputeMoveStatistics(&ctrl, graph, &nmoved, &maxin, &maxout); - rprintf(&ctrl, "Final %3d-way Cut: %6d \tBalance: ", inparts, graph->mincut); - avg = 0.0; - for (h=0; hgnpwgts[i*incon+h]/itpwgts[i*incon+h]); - avg += maximb; - rprintf(&ctrl, "%.3f ", maximb); - } - rprintf(&ctrl, "\nNMoved: %d %d %d %d\n", nmoved, maxin, maxout, maxin+maxout); - } + IFSET(ctrl->dbglvl, DBG_TIME, PrintTimingInfo(ctrl)); + IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->gcomm)); + IFSET(ctrl->dbglvl, DBG_INFO, PrintPostPartInfo(ctrl, graph, 1)); - /*************************************/ - /* Free memory, renumber, and return */ - /*************************************/ - GKfree((void **)&graph->lnpwgts, &graph->gnpwgts, &graph->nvwgt, &graph->home, - &itpwgts, LTERM); - - FreeInitialGraphAndRemap(graph, iwgtflag, vsize == NULL); - FreeWSpace(&wspace); - FreeCtrl(&ctrl); + FreeInitialGraphAndRemap(graph); - if (inumflag == 1) + if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0); - return; +DONE: + FreeCtrl(&ctrl); + if (gk_GetCurMemoryUsed() - curmem > 0) { + printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n", + (ssize_t)(gk_GetCurMemoryUsed() - curmem)); + } + gk_malloc_cleanup(0); + + return (int)status; } +/*************************************************************************/ +/*! This function is the driver for the adaptive refinement mode of + ParMETIS +*/ +/*************************************************************************/ +void Adaptive_Partition(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i; + idx_t tewgt, tvsize; + real_t gtewgt, gtvsize; + real_t ubavg, lbavg, *lbvec; + WCOREPUSH; -/************************************************************************* -* This function is the driver for the adaptive refinement mode of ParMETIS -**************************************************************************/ -void Adaptive_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) -{ - int i; - int tewgt, tvsize; - float gtewgt, gtvsize; - float ubavg, lbavg, lbvec[MAXNCON]; + lbvec = rwspacemalloc(ctrl, graph->ncon); /************************************/ /* Set up important data structures */ /************************************/ - SetUp(ctrl, graph, wspace); + CommSetup(ctrl, graph); - ubavg = savg(graph->ncon, ctrl->ubvec); - tewgt = idxsum(graph->nedges, graph->adjwgt); - tvsize = idxsum(graph->nvtxs, graph->vsize); - gtewgt = (float) GlobalSESum(ctrl, tewgt) + 1.0/graph->gnvtxs; /* The +1/graph->gnvtxs were added to remove any FPE */ - gtvsize = (float) GlobalSESum(ctrl, tvsize) + 1.0/graph->gnvtxs; + ubavg = ravg(graph->ncon, ctrl->ubvec); + tewgt = isum(graph->nedges, graph->adjwgt, 1); + tvsize = isum(graph->nvtxs, graph->vsize, 1); + gtewgt = (real_t) GlobalSESum(ctrl, tewgt) + 1.0/graph->gnvtxs; /* The +1/graph->gnvtxs were added to remove any FPE */ + gtvsize = (real_t) GlobalSESum(ctrl, tvsize) + 1.0/graph->gnvtxs; ctrl->redist_factor = ctrl->redist_base * ((gtewgt/gtvsize)/ ctrl->edge_size_ratio); - IFSET(ctrl->dbglvl, DBG_PROGRESS, rprintf(ctrl, "[%6d %8d %5d %5d][%d]\n", + IFSET(ctrl->dbglvl, DBG_PROGRESS, rprintf(ctrl, "[%6"PRIDX" %8"PRIDX" %5"PRIDX" %5"PRIDX"][%"PRIDX"]\n", graph->gnvtxs, GlobalSESum(ctrl, graph->nedges), GlobalSEMin(ctrl, graph->nvtxs), GlobalSEMax(ctrl, graph->nvtxs), ctrl->CoarsenTo)); if (graph->gnvtxs < 1.3*ctrl->CoarsenTo || (graph->finer != NULL && graph->gnvtxs > graph->finer->gnvtxs*COARSEN_FRACTION)) { + AllocateRefinementWorkSpace(ctrl, 2*graph->nedges); + /***********************************************/ /* Balance the partition on the coarsest graph */ /***********************************************/ - graph->where = idxsmalloc(graph->nvtxs+graph->nrecv, -1, "graph->where"); - idxcopy(graph->nvtxs, graph->home, graph->where); + graph->where = ismalloc(graph->nvtxs+graph->nrecv, -1, "graph->where"); + icopy(graph->nvtxs, graph->home, graph->where); - Mc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec); - lbavg = savg(graph->ncon, lbvec); + ComputeParallelBalance(ctrl, graph, graph->where, lbvec); + lbavg = ravg(graph->ncon, lbvec); if (lbavg > ubavg + 0.035 && ctrl->partType != REFINE_PARTITION) - Balance_Partition(ctrl, graph, wspace); + Balance_Partition(ctrl, graph); if (ctrl->dbglvl&DBG_PROGRESS) { - Mc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec); - rprintf(ctrl, "nvtxs: %10d, balance: ", graph->gnvtxs); + ComputePartitionParams(ctrl, graph); + ComputeParallelBalance(ctrl, graph, graph->where, lbvec); + rprintf(ctrl, "nvtxs: %10"PRIDX", cut: %8"PRIDX", balance: ", + graph->gnvtxs, graph->mincut); for (i=0; incon; i++) - rprintf(ctrl, "%.3f ", lbvec[i]); + rprintf(ctrl, "%.3"PRREAL" ", lbvec[i]); rprintf(ctrl, "\n"); + + /* free memory allocated by ComputePartitionParams */ + gk_free((void **)&graph->ckrinfo, &graph->lnpwgts, &graph->gnpwgts, LTERM); } /* check if no coarsening took place */ if (graph->finer == NULL) { - Mc_ComputePartitionParams(ctrl, graph, wspace); - Mc_KWayBalance(ctrl, graph, wspace, graph->ncon); - Mc_KWayAdaptiveRefine(ctrl, graph, wspace, NGR_PASSES); + ComputePartitionParams(ctrl, graph); + KWayBalance(ctrl, graph, graph->ncon); + KWayAdaptiveRefine(ctrl, graph, NGR_PASSES); } } else { @@ -241,40 +189,47 @@ void Adaptive_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) /*******************************/ switch (ctrl->ps_relation) { case PARMETIS_PSR_COUPLED: - Match_Local(ctrl, graph, wspace); + Match_Local(ctrl, graph); break; case PARMETIS_PSR_UNCOUPLED: default: - Match_Global(ctrl, graph, wspace); + Match_Global(ctrl, graph); break; } - Adaptive_Partition(ctrl, graph->coarser, wspace); + graph_WriteToDisk(ctrl, graph); + + Adaptive_Partition(ctrl, graph->coarser); + + graph_ReadFromDisk(ctrl, graph); /********************************/ /* project partition and refine */ /********************************/ - Mc_ProjectPartition(ctrl, graph, wspace); - Mc_ComputePartitionParams(ctrl, graph, wspace); + ProjectPartition(ctrl, graph); + ComputePartitionParams(ctrl, graph); if (graph->ncon > 1 && graph->level < 4) { - Mc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec); - lbavg = savg(graph->ncon, lbvec); + ComputeParallelBalance(ctrl, graph, graph->where, lbvec); + lbavg = ravg(graph->ncon, lbvec); if (lbavg > ubavg + 0.025) { - Mc_KWayBalance(ctrl, graph, wspace, graph->ncon); + KWayBalance(ctrl, graph, graph->ncon); } } - Mc_KWayAdaptiveRefine(ctrl, graph, wspace, NGR_PASSES); + KWayAdaptiveRefine(ctrl, graph, NGR_PASSES); if (ctrl->dbglvl&DBG_PROGRESS) { - Mc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec); - rprintf(ctrl, "nvtxs: %10d, cut: %8d, balance: ", graph->gnvtxs, graph->mincut); + ComputeParallelBalance(ctrl, graph, graph->where, lbvec); + rprintf(ctrl, "nvtxs: %10"PRIDX", cut: %8"PRIDX", balance: ", + graph->gnvtxs, graph->mincut); for (i=0; incon; i++) - rprintf(ctrl, "%.3f ", lbvec[i]); + rprintf(ctrl, "%.3"PRREAL" ", lbvec[i]); rprintf(ctrl, "\n"); } } + + WCOREPOP; } diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/backcompat.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/backcompat.c deleted file mode 100644 index 2fb0de83..00000000 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/backcompat.c +++ /dev/null @@ -1,517 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * backcompat.c - * - * This file ensures backwards compatability with previous ParMETIS releases - * - * Started 10/19/96 - * George - * - * $Id: backcompat.c,v 1.2 2003/07/21 17:18:48 karypis Exp $ - * - */ - -#include - -/***************************************************************************** -* This function computes a partitioning. -*****************************************************************************/ -void ParMETIS_PartKway(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, int *options, int *edgecut, - idxtype *part, MPI_Comm *comm) -{ - int i; - int ncon = 1; - float *tpwgts, ubvec[MAXNCON]; - int myoptions[10]; - - tpwgts = fmalloc(*nparts*ncon, "tpwgts"); - for (i=0; i<*nparts*ncon; i++) - tpwgts[i] = 1.0/(float)(*nparts); - for (i=0; i @@ -17,47 +17,54 @@ /************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ -int BalanceMyLink(CtrlType *ctrl, GraphType *graph, idxtype *home, int me, - int you, float *flows, float maxdiff, float *diff_cost, float *diff_lbavg, - float avgvwgt) +idx_t BalanceMyLink(ctrl_t *ctrl, graph_t *graph, idx_t *home, idx_t me, + idx_t you, real_t *flows, real_t maxdiff, real_t *diff_cost, + real_t *diff_lbavg, real_t avgvwgt) { - int h, i, ii, j, k; - int nvtxs, ncon; - int nqueues, minval, maxval, higain, vtx, edge, totalv; - int from, to, qnum, index, nchanges, cut, tmp; - int pass, nswaps, nmoves, multiplier; - idxtype *xadj, *vsize, *adjncy, *adjwgt, *where, *ed, *id; - idxtype *hval, *nvpq, *inq, *map, *rmap, *ptr, *myqueue, *changes; - float *nvwgt, lbvec[MAXNCON], pwgts[MAXNCON*2], tpwgts[MAXNCON*2], my_wgt[MAXNCON]; - float newgain; - float lbavg, bestflow, mycost; - float ipc_factor, redist_factor, ftmp; - FPQueueType *queues; -int mype; -MPI_Comm_rank(MPI_COMM_WORLD, &mype); - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - nvwgt = graph->nvwgt; - vsize = graph->vsize; + idx_t h, i, ii, j, k, mype; + idx_t nvtxs, ncon; + idx_t nqueues, minval, maxval, higain, vtx, edge, totalv; + idx_t from, to, qnum, index, nchanges, cut, tmp; + idx_t pass, nswaps, nmoves, multiplier; + idx_t *xadj, *vsize, *adjncy, *adjwgt, *where, *ed, *id; + idx_t *hval, *nvpq, *inq, *map, *rmap, *ptr, *myqueue, *changes; + real_t *nvwgt, *lbvec, *pwgts, *tpwgts, *my_wgt; + real_t newgain; + real_t lbavg, bestflow, mycost; + real_t ipc_factor, redist_factor, ftmp; + rpq_t **queues; + + WCOREPUSH; + + gkMPI_Comm_rank(MPI_COMM_WORLD, &mype); + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + nvwgt = graph->nvwgt; + vsize = graph->vsize; adjncy = graph->adjncy; adjwgt = graph->adjwgt; - where = graph->where; - ipc_factor = ctrl->ipc_factor; + where = graph->where; + + ipc_factor = ctrl->ipc_factor; redist_factor = ctrl->redist_factor; - hval = idxmalloc(nvtxs*7, "hval"); - id = hval + nvtxs; - ed = hval + nvtxs*2; - map = hval + nvtxs*3; - rmap = hval + nvtxs*4; - myqueue = hval + nvtxs*5; - changes = hval + nvtxs*6; + hval = iwspacemalloc(ctrl, nvtxs); + id = iwspacemalloc(ctrl, nvtxs); + ed = iwspacemalloc(ctrl, nvtxs); + map = iwspacemalloc(ctrl, nvtxs); + rmap = iwspacemalloc(ctrl, nvtxs); + myqueue = iwspacemalloc(ctrl, nvtxs); + changes = iwspacemalloc(ctrl, nvtxs); + + lbvec = rwspacemalloc(ctrl, ncon); + pwgts = rset(2*ncon, 0.0, rwspacemalloc(ctrl, 2*ncon)); + tpwgts = rwspacemalloc(ctrl, 2*ncon); + my_wgt = rset(ncon, 0.0, rwspacemalloc(ctrl, ncon)); - sset(ncon*2, 0.0, pwgts); for (h=0; h0; i--) - ptr[i] = ptr[i-1]; - ptr[0] = 0; + SHIFTCSR(i, nqueues, ptr); + /* initialize queues */ for (i=0; i 0) { - FPQueueInit(queues+i, nvpq[i]); - FPQueueInit(queues+i+nqueues, nvpq[i]); + queues[i] = rpqCreate(nvpq[i]); + queues[nqueues+i] = rpqCreate(nvpq[i]); } /* compute internal/external degrees */ - idxset(nvtxs, 0, id); - idxset(nvtxs, 0, ed); - for (j=0; j fabs(flows[j])) j = h; - bestflow = fabs(flows[j]); + /* bestflow = rfavg(ncon, flows); */ + for (j=0, h=0; h fabs(flows[j])) + j = h; + } + bestflow = fabs(flows[j]); nchanges = nmoves = 0; for (ii=0; ii avgvwgt) break; + } } else { - for (j=0; j fabs(flows[j])) j = h; - ftmp = fabs(flows[j]); + /* ftmp = rfavg(ncon, flows); */ + for (j=0, h=0; h fabs(flows[j])) + j = h; + } + ftmp = fabs(flows[j]); if (ftmp < bestflow) { bestflow = ftmp; @@ -231,7 +244,7 @@ MPI_Comm_rank(MPI_COMM_WORLD, &mype); changes[nchanges++] = vtx; } - SWAP(id[vtx], ed[vtx], tmp); + gk_SWAP(id[vtx], ed[vtx], tmp); for (j=xadj[vtx]; j 0) { - FPQueueReset(queues+i); - FPQueueReset(queues+i+nqueues); + rpqReset(queues[i]); + rpqReset(queues[i+nqueues]); } } @@ -286,11 +299,12 @@ MPI_Comm_rank(MPI_COMM_WORLD, &mype); /***************************/ /* compute 2-way imbalance */ /***************************/ - sset(ncon, 0.0, my_wgt); - for (i=0; i 0) { - FPQueueFree(queues+i); - FPQueueFree(queues+i+nqueues); + rpqDestroy(queues[i]); + rpqDestroy(queues[i+nqueues]); } + } + + WCOREPOP; - GKfree((void **)&hval, (void **)&nvpq, (void **)&ptr, (void **)&inq, (void **)&queues, LTERM); return nswaps; } diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/comm.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/comm.c index 362e6f70..c89b4d0c 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/comm.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/comm.c @@ -5,21 +5,276 @@ * * This function provides various high level communication functions * - * $Id: comm.c,v 1.2 2003/07/21 17:18:48 karypis Exp $ + * $Id: comm.c 10592 2011-07-16 21:17:53Z karypis $ */ #include +/*************************************************************************/ +/*! This function performs the following functions: + - determines the processors that contain adjacent vertices and setup + the infrastructure for efficient communication. + - localizes the numbering of the adjancency lists. +*/ +/**************************************************************************/ +void CommSetup(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, j, k, islocal, penum, gnvtxs, nvtxs, nlocal, firstvtx, lastvtx, + nsend, nrecv, nnbrs, nadj; + idx_t npes=ctrl->npes, mype=ctrl->mype; + idx_t *vtxdist, *xadj, *adjncy; + idx_t *peind, *recvptr, *recvind, *sendptr, *sendind; + idx_t *imap, *lperm; + idx_t *pexadj, *peadjncy, *peadjloc, *startsind; + ikv_t *recvrequests, *sendrequests, *adjpairs; + + if (graph->lperm != NULL) + return; /* The communication structure has already been setup */ + + STARTTIMER(ctrl, ctrl->SetupTmr); + + gnvtxs = graph->gnvtxs; + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + lperm = graph->lperm = iincset(nvtxs, 0, imalloc(nvtxs, "CommSetup: graph->lperm")); + + vtxdist = graph->vtxdist; + firstvtx = vtxdist[mype]; + lastvtx = vtxdist[mype+1]; + + WCOREPUSH; + /************************************************************* + * Determine what you need to receive + *************************************************************/ + /* first pass: determine nadj and interior/interface vertices */ + for (nlocal=0, nadj=0, i=0; i= lastvtx) { /* remote vertex */ + nadj++; + islocal = 0; + } + } + if (islocal) { + lperm[i] = lperm[nlocal]; + lperm[nlocal++] = i; + } + } + graph->nlocal = nlocal; -/************************************************************************* -* This function performs the gather/scatter for the boundary vertices -**************************************************************************/ -void CommInterfaceData(CtrlType *ctrl, GraphType *graph, idxtype *data, - idxtype *sendvector, idxtype *recvvector) + adjpairs = ikvwspacemalloc(ctrl, nadj+1); + + /* second pass: rewrite locale entries and populate remote edges */ + for (nadj=0, i=0; i= firstvtx && k < lastvtx) { /* local vertex */ + adjncy[j] = k-firstvtx; + } + else { /* remote vertex */ + adjpairs[nadj].key = k; + adjpairs[nadj++].val = j; + } + } + } + + STARTTIMER(ctrl, ctrl->AuxTmr1); + /* use a sort-based "unique" approach */ + ikvsorti(nadj, adjpairs); + adjpairs[nadj].key = gnvtxs+1; /* boundary condition */ + STOPTIMER(ctrl, ctrl->AuxTmr1); + + /* determine how many distinct vertices you need to receive */ + for (nrecv=0, i=0; inrecv = nrecv; + + + /* allocate space for the to be received vertices part of the recvinfo */ + recvind = graph->recvind = imalloc(nrecv, "CommSetup: recvind"); + + /* store distinct vertices into recvind array and re-write adjncy */ + for (nrecv=0, i=0; inrecv); + + + /* determine the number of neighboring processors */ + for (i=0, nnbrs=0, penum=0; penum= vtxdist[penum+1]) + break; + } + if (j > i) { + nnbrs++; + i = j; + } + } + graph->nnbrs = nnbrs; + + /* Update the ctrl arrays that have to do with p2p communication */ + CommUpdateNnbrs(ctrl, nnbrs); + + /* allocate space for peind/recvptr part of the recvinfo */ + peind = graph->peind = imalloc(nnbrs, "CommSetup: peind"); + recvptr = graph->recvptr = imalloc(nnbrs+1, "CommSetup: recvptr"); + + /* populate the peind/recvptr arrays */ + for (i=0, nnbrs=0, recvptr[0]=0, penum=0; penum= vtxdist[penum+1]) + break; + } + if (j > i) { + peind[nnbrs++] = penum; + recvptr[nnbrs] = j; + i = j; + } + } + PASSERT(ctrl, nnbrs == graph->nnbrs); + + WCOREPOP; + + /* PrintVector(ctrl, nnbrs+1, 0, recvptr, "recvptr"); */ + + + WCOREPUSH; + /************************************************************* + * Determine what you need to send + *************************************************************/ + /* GKTODO - This can be replaced via a sparse communication */ + /* Tell the other processors what they need to send you */ + recvrequests = ikvwspacemalloc(ctrl, npes); + sendrequests = ikvwspacemalloc(ctrl, npes); + memset(recvrequests, 0, sizeof(ikv_t)*npes); + for (i=0; icomm); + + /* PrintPairs(ctrl, npes, recvrequests, "recvrequests"); */ + /* PrintPairs(ctrl, npes, sendrequests, "sendrequests"); */ + + startsind = iwspacemalloc(ctrl, nnbrs); + sendptr = graph->sendptr = imalloc(nnbrs+1, "CommSetup: sendptr"); + + for (j=0, i=0; i 0) { + sendptr[j] = sendrequests[i].key; + startsind[j] = sendrequests[i].val; + j++; + } + } + PASSERT(ctrl, j == nnbrs); + + MAKECSR(i, nnbrs, sendptr); + + nsend = graph->nsend = sendptr[nnbrs]; + sendind = graph->sendind = imalloc(nsend, "CommSetup: sendind"); + + + /* Issue the receives for sendind */ + for (i=0; icomm, ctrl->rreq+i); + } + + /* Issue the sends. My recvind[penum] becomes penum's sendind[mype] */ + for (i=0; icomm, ctrl->sreq+i); + } + + gkMPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); + gkMPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); + + + /* Create the peadjncy data structure for sparse boundary exchanges */ + pexadj = graph->pexadj = ismalloc(nvtxs+1, 0, "CommSetup: pexadj"); + peadjncy = graph->peadjncy = imalloc(nsend, "CommSetup: peadjncy"); + peadjloc = graph->peadjloc = imalloc(nsend, "CommSetup: peadjloc"); + + for (i=0; i= firstvtx && sendind[i] < lastvtx, + (ctrl, "%"PRIDX" %"PRIDX" %"PRIDX"\n", sendind[i], firstvtx, lastvtx)); + pexadj[sendind[i]-firstvtx]++; + } + MAKECSR(i, nvtxs, pexadj); + + for (i=0; iimap = imalloc(nvtxs+nrecv, "CommSetup: imap"); + for (i=0; iSetupTmr); + +#ifdef DEBUG_SETUPINFO + rprintf(ctrl, "[%5"PRIDX" %5"PRIDX"] \tl:[%5"PRIDX" %5"PRIDX"] \ts:[%5"PRIDX", %5"PRIDX"] \tr:[%5"PRIDX", %5"PRIDX"]\n", + GlobalSEMin(ctrl, nvtxs), GlobalSEMax(ctrl, nvtxs), + GlobalSEMin(ctrl, nlocal), GlobalSEMax(ctrl, nlocal), + GlobalSEMin(ctrl, nsend), GlobalSEMax(ctrl, nsend), + GlobalSEMin(ctrl, nrecv), GlobalSEMax(ctrl, nrecv)); + + PrintSetUpInfo(ctrl, graph); +#endif + +} + + +/*************************************************************************/ +/*! This function updates the sreq/rreq/statuses arrays in ctrl based on + the new number of neighbors. +*/ +/*************************************************************************/ +void CommUpdateNnbrs(ctrl_t *ctrl, idx_t nnbrs) { - int i, k, nnbrs, firstvtx; - idxtype *peind, *sendptr, *sendind, *recvptr, *recvind; + if (ctrl->ncommpes >= nnbrs) + return; + + ctrl->ncommpes = nnbrs; + ctrl->sreq = (MPI_Request *)gk_realloc(ctrl->sreq, sizeof(MPI_Request)*nnbrs, "sreq"); + ctrl->rreq = (MPI_Request *)gk_realloc(ctrl->rreq, sizeof(MPI_Request)*nnbrs, "rreq"); + ctrl->statuses = (MPI_Status *)gk_realloc(ctrl->statuses, sizeof(MPI_Status)*nnbrs, "statuses"); + +} + + +/*************************************************************************/ +/*! This function performs the gather/scatter for the boundary vertices +*/ +/*************************************************************************/ +void CommInterfaceData(ctrl_t *ctrl, graph_t *graph, idx_t *data, + idx_t *recvvector) +{ + idx_t i, k, nnbrs, firstvtx; + idx_t *peind, *sendptr, *sendind, *sendvector, *recvptr, *recvind; + + WCOREPUSH; firstvtx = graph->vtxdist[ctrl->mype]; nnbrs = graph->nnbrs; @@ -31,24 +286,26 @@ void CommInterfaceData(CtrlType *ctrl, GraphType *graph, idxtype *data, /* Issue the receives first */ for (i=0; icomm, ctrl->rreq+i); } /* Issue the sends next */ k = sendptr[nnbrs]; + sendvector = iwspacemalloc(ctrl, k); for (i=0; icomm, ctrl->sreq+i); } /* OK, now get into the loop waiting for the operations to finish */ - MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); - MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); + gkMPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); + gkMPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); + WCOREPOP; } @@ -56,33 +313,36 @@ void CommInterfaceData(CtrlType *ctrl, GraphType *graph, idxtype *data, /************************************************************************* * This function performs the gather/scatter for the boundary vertices **************************************************************************/ -void CommChangedInterfaceData(CtrlType *ctrl, GraphType *graph, - int nchanged, idxtype *changed, idxtype *data, - KeyValueType *sendpairs, KeyValueType *recvpairs, idxtype *psendptr) +void CommChangedInterfaceData(ctrl_t *ctrl, graph_t *graph, idx_t nchanged, + idx_t *changed, idx_t *data, ikv_t *sendpairs, ikv_t *recvpairs) { - int i, j, k, n, penum, nnbrs, firstvtx, nrecv; - idxtype *peind, *sendptr, *recvptr, *recvind, *pexadj, *peadjncy, *peadjloc; - KeyValueType *pairs; + idx_t i, j, k, nnbrs, firstvtx, nrecv, penum, nreceived; + idx_t *peind, *sendptr, *recvptr, *recvind, *pexadj, *peadjncy, + *peadjloc, *psendptr; + ikv_t *pairs; + firstvtx = graph->vtxdist[ctrl->mype]; - nnbrs = graph->nnbrs; - nrecv = graph->nrecv; - peind = graph->peind; - sendptr = graph->sendptr; - recvptr = graph->recvptr; - recvind = graph->recvind; - pexadj = graph->pexadj; + nnbrs = graph->nnbrs; + nrecv = graph->nrecv; + peind = graph->peind; + sendptr = graph->sendptr; + recvptr = graph->recvptr; + recvind = graph->recvind; + pexadj = graph->pexadj; peadjncy = graph->peadjncy; peadjloc = graph->peadjloc; /* Issue the receives first */ for (i=0; icomm, ctrl->rreq+i); } if (nchanged != 0) { - idxcopy(ctrl->npes, sendptr, psendptr); + WCOREPUSH; + + psendptr = icopy(nnbrs, sendptr, iwspacemalloc(ctrl, nnbrs)); /* Copy the changed values into the sendvector */ for (i=0; icomm, ctrl->sreq+i); } + + WCOREPOP; } else { for (i=0; icomm, ctrl->sreq+i); + gkMPI_Isend((void *)(sendpairs), 0, IDX_T, peind[i], 1, ctrl->comm, ctrl->sreq+i); } + /* OK, now get into the loop waiting for the operations to finish */ for (i=0; irreq+i, &(ctrl->status)); - MPI_Get_count(&ctrl->status, IDX_DATATYPE, &n); - if (n != 0) { - n = n/2; - pairs = recvpairs+graph->recvptr[i]; - for (k=0; krreq+i, &(ctrl->status)); + gkMPI_Get_count(&ctrl->status, IDX_T, &nreceived); + if (nreceived != 0) { + nreceived = nreceived/2; + pairs = recvpairs+graph->recvptr[i]; + for (k=0; ksreq, ctrl->statuses); + gkMPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); } @@ -125,11 +388,11 @@ void CommChangedInterfaceData(CtrlType *ctrl, GraphType *graph, /************************************************************************* * This function computes the max of a single element **************************************************************************/ -int GlobalSEMax(CtrlType *ctrl, int value) +idx_t GlobalSEMax(ctrl_t *ctrl, idx_t value) { - int max; + idx_t max; - MPI_Allreduce((void *)&value, (void *)&max, 1, MPI_INT, MPI_MAX, ctrl->comm); + gkMPI_Allreduce((void *)&value, (void *)&max, 1, IDX_T, MPI_MAX, ctrl->comm); return max; } @@ -137,25 +400,35 @@ int GlobalSEMax(CtrlType *ctrl, int value) /************************************************************************* * This function computes the max of a single element **************************************************************************/ -double GlobalSEMaxDouble(CtrlType *ctrl, double value) +idx_t GlobalSEMaxComm(MPI_Comm comm, idx_t value) { - double max; + idx_t max; - MPI_Allreduce((void *)&value, (void *)&max, 1, MPI_DOUBLE, MPI_MAX, ctrl->comm); + gkMPI_Allreduce((void *)&value, (void *)&max, 1, IDX_T, MPI_MAX, comm); return max; } +/************************************************************************* +* This function computes the max of a single element +**************************************************************************/ +idx_t GlobalSEMin(ctrl_t *ctrl, idx_t value) +{ + idx_t min; + + gkMPI_Allreduce((void *)&value, (void *)&min, 1, IDX_T, MPI_MIN, ctrl->comm); + return min; +} /************************************************************************* * This function computes the max of a single element **************************************************************************/ -int GlobalSEMin(CtrlType *ctrl, int value) +idx_t GlobalSEMinComm(MPI_Comm comm, idx_t value) { - int min; + idx_t min; - MPI_Allreduce((void *)&value, (void *)&min, 1, MPI_INT, MPI_MIN, ctrl->comm); + gkMPI_Allreduce((void *)&value, (void *)&min, 1, IDX_T, MPI_MIN, comm); return min; } @@ -163,38 +436,47 @@ int GlobalSEMin(CtrlType *ctrl, int value) /************************************************************************* * This function computes the max of a single element **************************************************************************/ -int GlobalSESum(CtrlType *ctrl, int value) +idx_t GlobalSESum(ctrl_t *ctrl, idx_t value) { - int sum; + idx_t sum; - MPI_Allreduce((void *)&value, (void *)&sum, 1, MPI_INT, MPI_SUM, ctrl->comm); + gkMPI_Allreduce((void *)&value, (void *)&sum, 1, IDX_T, MPI_SUM, ctrl->comm); return sum; } - /************************************************************************* * This function computes the max of a single element **************************************************************************/ -float GlobalSEMaxFloat(CtrlType *ctrl, float value) +idx_t GlobalSESumComm(MPI_Comm comm, idx_t value) { - float max; + idx_t min; - MPI_Allreduce((void *)&value, (void *)&max, 1, MPI_FLOAT, MPI_MAX, ctrl->comm); + gkMPI_Allreduce((void *)&value, (void *)&min, 1, IDX_T, MPI_SUM, comm); - return max; + return min; } +/************************************************************************* +* This function computes the max of a single element +**************************************************************************/ +real_t GlobalSEMaxFloat(ctrl_t *ctrl, real_t value) +{ + real_t max; + gkMPI_Allreduce((void *)&value, (void *)&max, 1, REAL_T, MPI_MAX, ctrl->comm); + + return max; +} /************************************************************************* * This function computes the max of a single element **************************************************************************/ -float GlobalSEMinFloat(CtrlType *ctrl, float value) +real_t GlobalSEMinFloat(ctrl_t *ctrl, real_t value) { - float min; + real_t min; - MPI_Allreduce((void *)&value, (void *)&min, 1, MPI_FLOAT, MPI_MIN, ctrl->comm); + gkMPI_Allreduce((void *)&value, (void *)&min, 1, REAL_T, MPI_MIN, ctrl->comm); return min; } @@ -202,11 +484,11 @@ float GlobalSEMinFloat(CtrlType *ctrl, float value) /************************************************************************* * This function computes the max of a single element **************************************************************************/ -float GlobalSESumFloat(CtrlType *ctrl, float value) +real_t GlobalSESumFloat(ctrl_t *ctrl, real_t value) { - float sum; + real_t sum; - MPI_Allreduce((void *)&value, (void *)&sum, 1, MPI_FLOAT, MPI_SUM, ctrl->comm); + gkMPI_Allreduce((void *)&value, (void *)&sum, 1, REAL_T, MPI_SUM, ctrl->comm); return sum; } diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/csrmatch.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/csrmatch.c index ace79984..e0540f01 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/csrmatch.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/csrmatch.c @@ -8,52 +8,49 @@ * Started 7/23/97 * George * - * $Id: csrmatch.c,v 1.2 2003/07/21 17:18:48 karypis Exp $ + * $Id: csrmatch.c 10057 2011-06-02 13:44:44Z karypis $ * */ #include - - /************************************************************************* * This function finds a matching using the HEM heuristic **************************************************************************/ -void CSR_Match_SHEM(MatrixType *matrix, idxtype *match, idxtype *mlist, - idxtype *skip, int ncon) +void CSR_Match_SHEM(matrix_t *matrix, idx_t *match, idx_t *mlist, + idx_t *skip, idx_t ncon) { - int h, i, ii, j; - int nrows, edge, maxidx, count; - float maxwgt; - idxtype *rowptr, *colind; - float *transfer; - KVType *links; - - nrows = matrix->nrows; - rowptr = matrix->rowptr; - colind = matrix->colind; + idx_t h, i, ii, j; + idx_t nrows, edge, maxidx, count; + real_t maxwgt; + idx_t *rowptr, *colind; + real_t *transfer; + rkv_t *links; + + nrows = matrix->nrows; + rowptr = matrix->rowptr; + colind = matrix->colind; transfer = matrix->transfer; - idxset(nrows, UNMATCHED, match); + iset(nrows, UNMATCHED, match); - links = (KVType *)GKmalloc(sizeof(KVType)*nrows, "links"); - for (i=0; i + + + +/*************************************************************************/ +/*! This function sets the ctrl_t structure +*/ +/*************************************************************************/ +ctrl_t *SetupCtrl(pmoptype_et optype, idx_t *options, idx_t ncon, idx_t nparts, + real_t *tpwgts, real_t *ubvec, MPI_Comm comm) +{ + idx_t i, j, defopts; + ctrl_t *ctrl; + + ctrl = (ctrl_t *)gk_malloc(sizeof(ctrl_t), "SetupCtrl: ctrl"); + memset((void *)ctrl, 0, sizeof(ctrl_t)); + + + /* communicator-related info */ + MPI_Comm_dup(comm, &(ctrl->gcomm)); + ctrl->comm = ctrl->gcomm; + ctrl->free_comm = 1; + gkMPI_Comm_rank(ctrl->gcomm, &ctrl->mype); + gkMPI_Comm_size(ctrl->gcomm, &ctrl->npes); + + + /* options[]-related info */ + defopts = (options == NULL ? 1 : options[0] == 0); + switch (optype) { + case PARMETIS_OP_KMETIS: + case PARMETIS_OP_GKMETIS: + ctrl->partType = STATIC_PARTITION; + ctrl->ps_relation = -1; + break; + + case PARMETIS_OP_GMETIS: + break; + + case PARMETIS_OP_RMETIS: + ctrl->partType = REFINE_PARTITION; + ctrl->ipc_factor = 1000.0; + ctrl->ps_relation = (defopts ? (ctrl->npes == nparts ? PARMETIS_PSR_COUPLED + : PARMETIS_PSR_UNCOUPLED) + : (ctrl->npes == nparts ? options[PMV3_OPTION_PSR] + : PARMETIS_PSR_UNCOUPLED)); + break; + + case PARMETIS_OP_AMETIS: + ctrl->partType = ADAPTIVE_PARTITION; + ctrl->ps_relation = (defopts ? (ctrl->npes == nparts ? PARMETIS_PSR_COUPLED + : PARMETIS_PSR_UNCOUPLED) + : (ctrl->npes == nparts ? options[PMV3_OPTION_PSR] + : PARMETIS_PSR_UNCOUPLED)); + break; + + case PARMETIS_OP_OMETIS: + /* This is handled directly by the code as its parameter passing does not + conform to the options[] style. This will probably be changed once the + changed have been debugged. */ + break; + + case PARMETIS_OP_M2DUAL: + break; + + case PARMETIS_OP_MKMETIS: + break; + } + ctrl->dbglvl = (defopts ? GLOBAL_DBGLVL : options[PMV3_OPTION_DBGLVL]); + ctrl->seed = (defopts ? GLOBAL_SEED : options[PMV3_OPTION_SEED]); + ctrl->sync = GlobalSEMax(ctrl, ctrl->seed); + ctrl->seed = (ctrl->seed == 0 ? ctrl->mype : ctrl->seed*ctrl->mype); + + /* options passed via dbglvl */ + ctrl->dropedges = ctrl->dbglvl&PARMETIS_DBGLVL_DROPEDGES; + ctrl->twohop = ctrl->dbglvl&PARMETIS_DBGLVL_TWOHOP; + ctrl->fast = ctrl->dbglvl&PARMETIS_DBGLVL_FAST; + ctrl->ondisk = ctrl->dbglvl&PARMETIS_DBGLVL_ONDISK; + ctrl->pid = getpid(); + + /* common info */ + ctrl->optype = optype; + ctrl->ncon = ncon; + ctrl->nparts = nparts; + ctrl->redist_factor = 1.0; + ctrl->redist_base = 1.0; + + /* setup tpwgts */ + ctrl->tpwgts = rmalloc(nparts*ncon, "SetupCtrl: tpwgts"); + if (tpwgts) { + rcopy(nparts*ncon, tpwgts, ctrl->tpwgts); + } + else { + for (i=0; itpwgts[i*ncon+j] = 1.0/nparts; + } + } + + /* setup ubvec */ + ctrl->ubvec = rsmalloc(ncon, UNBALANCE_FRACTION, "SetupCtrl: ubvec"); + if (ubvec) + rcopy(ncon, ubvec, ctrl->ubvec); + + /* initialize the various timers */ + InitTimers(ctrl); + + /* initialize the random number generator */ + srand(ctrl->seed); + + return ctrl; +} + + +/*************************************************************************/ +/*! This function computes the invtvwgts of a graph and stores them in ctrl +*/ +/*************************************************************************/ +void SetupCtrl_invtvwgts(ctrl_t *ctrl, graph_t *graph) +{ + idx_t j, ncon; + + ncon = graph->ncon; + + ctrl->invtvwgts = rmalloc(ncon, "SetupCtrl_tvwgts: invtvwgts"); + + for (j=0; jinvtvwgts[j] = 1.0/GlobalSESum(ctrl, isum(graph->nvtxs, graph->vwgt+j, ncon)); + +} + + +/*************************************************************************/ +/*! This function de-allocates memory allocated for the control structures +*/ +/*************************************************************************/ +void FreeCtrl(ctrl_t **r_ctrl) +{ + ctrl_t *ctrl = *r_ctrl; + + FreeWSpace(ctrl); + + if (ctrl->free_comm) + gkMPI_Comm_free(&(ctrl->gcomm)); + + gk_free((void **)&ctrl->invtvwgts, + &ctrl->ubvec, &ctrl->tpwgts, + &ctrl->sreq, &ctrl->rreq, &ctrl->statuses, + &ctrl, + LTERM); + + *r_ctrl = NULL; +} + diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/debug.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/debug.c index adcfc9ab..7c356c1b 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/debug.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/debug.c @@ -9,7 +9,7 @@ * Started 10/20/96 * George * - * $Id: debug.c,v 1.2 2003/07/21 17:18:48 karypis Exp $ + * $Id: debug.c 10391 2011-06-23 19:00:08Z karypis $ * */ @@ -19,21 +19,21 @@ /************************************************************************* * This function prints a vector stored in each processor **************************************************************************/ -void PrintVector(CtrlType *ctrl, int n, int first, idxtype *vec, char *title) +void PrintVector(ctrl_t *ctrl, idx_t n, idx_t first, idx_t *vec, char *title) { - int i, penum; + idx_t i, penum; for (penum=0; penumnpes; penum++) { if (ctrl->mype == penum) { if (ctrl->mype == 0) - printf("%s\n", title); - printf("\t%3d. ", ctrl->mype); + fprintf(stdout, "%s\n", title); + fprintf(stdout, "\t%3"PRIDX". ", ctrl->mype); for (i=0; icomm); + gkMPI_Barrier(ctrl->comm); } } @@ -41,21 +41,23 @@ void PrintVector(CtrlType *ctrl, int n, int first, idxtype *vec, char *title) /************************************************************************* * This function prints a vector stored in each processor **************************************************************************/ -void PrintVector2(CtrlType *ctrl, int n, int first, idxtype *vec, char *title) +void PrintVector2(ctrl_t *ctrl, idx_t n, idx_t first, idx_t *vec, char *title) { - int i, penum; + idx_t i, penum; for (penum=0; penumnpes; penum++) { if (ctrl->mype == penum) { if (ctrl->mype == 0) printf("%s\n", title); - printf("\t%3d. ", ctrl->mype); + printf("\t%3"PRIDX". ", ctrl->mype); for (i=0; i=KEEP_BIT ? 1 : 0), (vec[i]>=KEEP_BIT ? vec[i]-KEEP_BIT : vec[i])); + printf("[%"PRIDX" %"PRIDX".%"PRIDX"] ", first+i, + (idx_t)(vec[i]>=KEEP_BIT ? 1 : 0), + (idx_t)(vec[i]>=KEEP_BIT ? vec[i]-KEEP_BIT : vec[i])); printf("\n"); fflush(stdout); } - MPI_Barrier(ctrl->comm); + gkMPI_Barrier(ctrl->comm); } } @@ -63,21 +65,21 @@ void PrintVector2(CtrlType *ctrl, int n, int first, idxtype *vec, char *title) /************************************************************************* * This function prints a vector stored in each processor **************************************************************************/ -void PrintPairs(CtrlType *ctrl, int n, KeyValueType *pairs, char *title) +void PrintPairs(ctrl_t *ctrl, idx_t n, ikv_t *pairs, char *title) { - int i, penum; + idx_t i, penum; for (penum=0; penumnpes; penum++) { if (ctrl->mype == penum) { if (ctrl->mype == 0) printf("%s\n", title); - printf("\t%3d. ", ctrl->mype); + printf("\t%3"PRIDX". ", ctrl->mype); for (i=0; icomm); + gkMPI_Barrier(ctrl->comm); } } @@ -87,30 +89,30 @@ void PrintPairs(CtrlType *ctrl, int n, KeyValueType *pairs, char *title) * This function prints the local portion of the graph stored at each * processor **************************************************************************/ -void PrintGraph(CtrlType *ctrl, GraphType *graph) +void PrintGraph(ctrl_t *ctrl, graph_t *graph) { - int i, j, penum; - int firstvtx; + idx_t i, j, penum; + idx_t firstvtx; - MPI_Barrier(ctrl->comm); + gkMPI_Barrier(ctrl->comm); firstvtx = graph->vtxdist[ctrl->mype]; for (penum=0; penumnpes; penum++) { if (ctrl->mype == penum) { - printf("\t%d", penum); + printf("\t%"PRIDX"", penum); for (i=0; invtxs; i++) { if (i==0) - printf("\t%2d %2d\t", firstvtx+i, graph->vwgt[i]); + printf("\t%2"PRIDX" %2"PRIDX"\t", firstvtx+i, graph->vwgt[i]); else - printf("\t\t%2d %2d\t", firstvtx+i, graph->vwgt[i]); + printf("\t\t%2"PRIDX" %2"PRIDX"\t", firstvtx+i, graph->vwgt[i]); for (j=graph->xadj[i]; jxadj[i+1]; j++) - printf("[%d %d] ", graph->adjncy[j], graph->adjwgt[j]); + printf("[%"PRIDX" %"PRIDX"] ", graph->adjncy[j], graph->adjwgt[j]); printf("\n"); } fflush(stdout); } - MPI_Barrier(ctrl->comm); + gkMPI_Barrier(ctrl->comm); } } @@ -119,30 +121,30 @@ void PrintGraph(CtrlType *ctrl, GraphType *graph) * This function prints the local portion of the graph stored at each * processor along with degree information during refinement **************************************************************************/ -void PrintGraph2(CtrlType *ctrl, GraphType *graph) +void PrintGraph2(ctrl_t *ctrl, graph_t *graph) { - int i, j, penum; - int firstvtx; + idx_t i, j, penum; + idx_t firstvtx; - MPI_Barrier(ctrl->comm); + gkMPI_Barrier(ctrl->comm); firstvtx = graph->vtxdist[ctrl->mype]; for (penum=0; penumnpes; penum++) { if (ctrl->mype == penum) { - printf("\t%d", penum); + printf("\t%"PRIDX"", penum); for (i=0; invtxs; i++) { if (i==0) - printf("\t%2d %2d [%d %d %d]\t", firstvtx+i, graph->vwgt[i], graph->where[i], graph->rinfo[i].id, graph->rinfo[i].ed); + printf("\t%2"PRIDX" %2"PRIDX" [%"PRIDX" %"PRIDX" %"PRIDX"]\t", firstvtx+i, graph->vwgt[i], graph->where[i], graph->ckrinfo[i].id, graph->ckrinfo[i].ed); else - printf("\t\t%2d %2d [%d %d %d]\t", firstvtx+i, graph->vwgt[i], graph->where[i], graph->rinfo[i].id, graph->rinfo[i].ed); + printf("\t\t%2"PRIDX" %2"PRIDX" [%"PRIDX" %"PRIDX" %"PRIDX"]\t", firstvtx+i, graph->vwgt[i], graph->where[i], graph->ckrinfo[i].id, graph->ckrinfo[i].ed); for (j=graph->xadj[i]; jxadj[i+1]; j++) - printf("[%d %d] ", graph->adjncy[j], graph->adjwgt[j]); + printf("[%"PRIDX" %"PRIDX"] ", graph->adjncy[j], graph->adjwgt[j]); printf("\n"); } fflush(stdout); } - MPI_Barrier(ctrl->comm); + gkMPI_Barrier(ctrl->comm); } } @@ -150,32 +152,32 @@ void PrintGraph2(CtrlType *ctrl, GraphType *graph) /************************************************************************* * This function prints the information computed during setup **************************************************************************/ -void PrintSetUpInfo(CtrlType *ctrl, GraphType *graph) +void PrintSetUpInfo(ctrl_t *ctrl, graph_t *graph) { - int i, j, penum; + idx_t i, j, penum; - MPI_Barrier(ctrl->comm); + gkMPI_Barrier(ctrl->comm); for (penum=0; penumnpes; penum++) { if (ctrl->mype == penum) { - printf("PE: %d, nnbrs: %d\n", ctrl->mype, graph->nnbrs); + printf("PE: %"PRIDX", nnbrs: %"PRIDX"\n", ctrl->mype, graph->nnbrs); printf("\tSending...\n"); for (i=0; innbrs; i++) { - printf("\t\tTo: %d: ", graph->peind[i]); + printf("\t\tTo: %"PRIDX": ", graph->peind[i]); for (j=graph->sendptr[i]; jsendptr[i+1]; j++) - printf("%d ", graph->sendind[j]); + printf("%"PRIDX" ", graph->sendind[j]); printf("\n"); } printf("\tReceiving...\n"); for (i=0; innbrs; i++) { - printf("\t\tFrom: %d: ", graph->peind[i]); + printf("\t\tFrom: %"PRIDX": ", graph->peind[i]); for (j=graph->recvptr[i]; jrecvptr[i+1]; j++) - printf("%d ", graph->recvind[j]); + printf("%"PRIDX" ", graph->recvind[j]); printf("\n"); } printf("\n"); } - MPI_Barrier(ctrl->comm); + gkMPI_Barrier(ctrl->comm); } } @@ -184,22 +186,22 @@ void PrintSetUpInfo(CtrlType *ctrl, GraphType *graph) /************************************************************************* * This function prints information about the graphs that were sent/received **************************************************************************/ -void PrintTransferedGraphs(CtrlType *ctrl, int nnbrs, idxtype *peind, idxtype *slens, - idxtype *rlens, idxtype *sgraph, idxtype *rgraph) +void PrintTransferedGraphs(ctrl_t *ctrl, idx_t nnbrs, idx_t *peind, + idx_t *slens, idx_t *rlens, idx_t *sgraph, idx_t *rgraph) { - int i, ii, jj, ll, penum; + idx_t i, ii, jj, ll, penum; - MPI_Barrier(ctrl->comm); + gkMPI_Barrier(ctrl->comm); for (penum=0; penumnpes; penum++) { if (ctrl->mype == penum) { - printf("PE: %d, nnbrs: %d", ctrl->mype, nnbrs); + printf("PE: %"PRIDX", nnbrs: %"PRIDX"", ctrl->mype, nnbrs); for (ll=i=0; i 0) { - printf("\n\tTo %d\t", peind[i]); + printf("\n\tTo %"PRIDX"\t", peind[i]); for (ii=slens[i]; ii 0) { - printf("\n\tFrom %d\t", peind[i]); + printf("\n\tFrom %"PRIDX"\t", peind[i]); for (ii=rlens[i]; iicomm); + gkMPI_Barrier(ctrl->comm); } } @@ -229,18 +231,18 @@ void PrintTransferedGraphs(CtrlType *ctrl, int nnbrs, idxtype *peind, idxtype *s /************************************************************************* * This function writes a graph in the format used by serial METIS **************************************************************************/ -void WriteMetisGraph(int nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt) +void WriteMetisGraph(idx_t nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, idx_t *adjwgt) { - int i, j; + idx_t i, j; FILE *fp; fp = fopen("test.graph", "w"); - fprintf(fp, "%d %d 11", nvtxs, xadj[nvtxs]/2); + fprintf(fp, "%"PRIDX" %"PRIDX" 11", nvtxs, xadj[nvtxs]/2); for (i=0; invtxs; - xadj = graph->xadj; + idx_t i, ii, j, jj, k, l; + idx_t nvtxs, nrows; + idx_t *xadj, *adjncy, *where; + idx_t *rowptr, *colind; + idx_t *pcounts, *perm, *marker; + real_t *values; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; adjncy = graph->adjncy; - where = graph->where; + where = graph->where; - nrows = matrix->nrows; + nrows = matrix->nrows; rowptr = matrix->rowptr; colind = matrix->colind; values = matrix->values; - perm = workspace; - marker = idxset(nrows, -1, workspace+nvtxs); - pcounts = idxset(nrows+1, 0, workspace+nvtxs+nrows); + perm = workspace; + marker = iset(nrows, -1, workspace+nvtxs); + pcounts = iset(nrows+1, 0, workspace+nvtxs+nrows); for (i=0; innzs = rowptr[nrows]; @@ -84,23 +84,23 @@ void SetUpConnectGraph(GraphType *graph, MatrixType *matrix, idxtype *workspace) * This function computes movement statistics for adaptive refinement * schemes **************************************************************************/ -void Mc_ComputeMoveStatistics(CtrlType *ctrl, GraphType *graph, int *nmoved, int *maxin, int *maxout) +void Mc_ComputeMoveStatistics(ctrl_t *ctrl, graph_t *graph, idx_t *nmoved, idx_t *maxin, idx_t *maxout) { - int i, nvtxs, nparts, myhome; - idxtype *vwgt, *where; - idxtype *lend, *gend, *lleft, *gleft, *lstart, *gstart; + idx_t i, nvtxs, nparts, myhome; + idx_t *vwgt, *where; + idx_t *lend, *gend, *lleft, *gleft, *lstart, *gstart; nvtxs = graph->nvtxs; vwgt = graph->vwgt; where = graph->where; nparts = ctrl->nparts; - lstart = idxsmalloc(nparts, 0, "ComputeMoveStatistics: lstart"); - gstart = idxsmalloc(nparts, 0, "ComputeMoveStatistics: gstart"); - lleft = idxsmalloc(nparts, 0, "ComputeMoveStatistics: lleft"); - gleft = idxsmalloc(nparts, 0, "ComputeMoveStatistics: gleft"); - lend = idxsmalloc(nparts, 0, "ComputeMoveStatistics: lend"); - gend = idxsmalloc(nparts, 0, "ComputeMoveStatistics: gend"); + lstart = ismalloc(nparts, 0, "ComputeMoveStatistics: lstart"); + gstart = ismalloc(nparts, 0, "ComputeMoveStatistics: gstart"); + lleft = ismalloc(nparts, 0, "ComputeMoveStatistics: lleft"); + gleft = ismalloc(nparts, 0, "ComputeMoveStatistics: gleft"); + lend = ismalloc(nparts, 0, "ComputeMoveStatistics: lend"); + gend = ismalloc(nparts, 0, "ComputeMoveStatistics: gend"); for (i=0; ips_relation == PARMETIS_PSR_COUPLED) ? ctrl->mype : graph->home[i]; @@ -112,26 +112,27 @@ void Mc_ComputeMoveStatistics(CtrlType *ctrl, GraphType *graph, int *nmoved, int /* PrintVector(ctrl, ctrl->npes, 0, lend, "Lend: "); */ - MPI_Allreduce((void *)lstart, (void *)gstart, nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm); - MPI_Allreduce((void *)lleft, (void *)gleft, nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm); - MPI_Allreduce((void *)lend, (void *)gend, nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm); + gkMPI_Allreduce((void *)lstart, (void *)gstart, nparts, IDX_T, MPI_SUM, ctrl->comm); + gkMPI_Allreduce((void *)lleft, (void *)gleft, nparts, IDX_T, MPI_SUM, ctrl->comm); + gkMPI_Allreduce((void *)lend, (void *)gend, nparts, IDX_T, MPI_SUM, ctrl->comm); - *nmoved = idxsum(nparts, gleft); - *maxout = gleft[idxamax(nparts, gleft)]; + *nmoved = isum(nparts, gleft, 1); + *maxout = imax(nparts, gleft, 1); for (i=0; invtxs; i++) { if (graph->where[i] != home[i]) @@ -142,28 +143,27 @@ int Mc_ComputeSerialTotalV(GraphType *graph, idxtype *home) } - /************************************************************************* * This function computes the load for each subdomain **************************************************************************/ -void ComputeLoad(GraphType *graph, int nparts, float *load, float *tpwgts, int index) +void ComputeLoad(graph_t *graph, idx_t nparts, real_t *load, real_t *tpwgts, idx_t index) { - int i; - int nvtxs, ncon; - idxtype *where; - float *nvwgt; + idx_t i; + idx_t nvtxs, ncon; + idx_t *where; + real_t *nvwgt; nvtxs = graph->nvtxs; ncon = graph->ncon; where = graph->where; nvwgt = graph->nvwgt; - sset(nparts, 0.0, load); + rset(nparts, 0.0, load); for (i=0; inrows; + n = A->nrows; rowptr = A->rowptr; colind = A->colind; values = A->values; @@ -209,9 +209,9 @@ void ConjGrad2(MatrixType *A, float *b, float *x, float tol, float *workspace) for (i=0; i 0.0) { - error = snorm2(n, r) / bnrm2; + error = rnorm2(n, r, 1) / bnrm2; if (error > tol) { /* Begin Iterations */ @@ -219,10 +219,10 @@ void ConjGrad2(MatrixType *A, float *b, float *x, float tol, float *workspace) for (i=0; inrows; i++) w[i] = 0.0; @@ -273,12 +273,12 @@ void mvMult2(MatrixType *A, float *v, float *w) /************************************************************************* * This function sets up the transfer vectors **************************************************************************/ -void ComputeTransferVector(int ncon, MatrixType *matrix, float *solution, - float *transfer, int index) +void ComputeTransferVector(idx_t ncon, matrix_t *matrix, real_t *solution, + real_t *transfer, idx_t index) { - int j, k; - int nrows; - idxtype *rowptr, *colind; + idx_t j, k; + idx_t nrows; + idx_t *rowptr, *colind; nrows = matrix->nrows; rowptr = matrix->rowptr; diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/fpqueue.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/fpqueue.c deleted file mode 100644 index 772f78c8..00000000 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/fpqueue.c +++ /dev/null @@ -1,398 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * fpqueue.c - * - * This file contains functions for manipulating the bucket list - * representation of the gains associated with each vertex in a graph. - * These functions are used by the refinement algorithms - * - * Started 9/2/94 - * George - * - * $Id: fpqueue.c,v 1.2 2003/07/21 17:18:48 karypis Exp $ - * - */ - -#include - - -/************************************************************************* -* This function initializes the data structures of the priority queue -**************************************************************************/ -void FPQueueInit(FPQueueType *queue, int maxnodes) -{ - queue->nnodes = 0; - queue->maxnodes = maxnodes; - queue->heap = NULL; - queue->locator = NULL; - - queue->heap = (FKeyValueType *) malloc(sizeof(FKeyValueType)*maxnodes); - queue->locator = (idxtype *) malloc(sizeof(idxtype)*maxnodes); - - idxset(maxnodes, -1, queue->locator); - -} - - -/************************************************************************* -* This function resets the buckets -**************************************************************************/ -void FPQueueReset(FPQueueType *queue) -{ - queue->nnodes = 0; - - idxset(queue->maxnodes, -1, queue->locator); - -} - - -/************************************************************************* -* This function frees the buckets -**************************************************************************/ -void FPQueueFree(FPQueueType *queue) -{ - - GKfree((void **)&queue->heap, &queue->locator, LTERM); - - queue->maxnodes = 0; -} - - -/************************************************************************* -* This function returns the number of nodes in the queue -**************************************************************************/ -int FPQueueGetSize(FPQueueType *queue) -{ - return queue->nnodes; -} - - -/************************************************************************* -* This function adds a node of certain gain into a partition -**************************************************************************/ -int FPQueueInsert(FPQueueType *queue, int node, float gain) -{ - int i, j; - idxtype *locator; - FKeyValueType *heap; - - ASSERTS(CheckHeapFloat(queue)); - - heap = queue->heap; - locator = queue->locator; - - ASSERTS(locator[node] == -1); - - i = queue->nnodes++; - while (i > 0) { - j = (i-1)/2; - if (heap[j].key < gain) { - heap[i] = heap[j]; - locator[heap[i].val] = i; - i = j; - } - else - break; - } - ASSERTS(i >= 0); - heap[i].key = gain; - heap[i].val = node; - locator[node] = i; - - ASSERTS(CheckHeapFloat(queue)); - - return 0; -} - - -/************************************************************************* -* This function deletes a node from a partition and reinserts it with -* an updated gain -**************************************************************************/ -int FPQueueDelete(FPQueueType *queue, int node) -{ - int i, j; - float newgain, oldgain; - idxtype *locator; - FKeyValueType *heap; - - heap = queue->heap; - locator = queue->locator; - - ASSERTS(locator[node] != -1); - ASSERTS(heap[locator[node]].val == node); - - ASSERTS(CheckHeapFloat(queue)); - - i = locator[node]; - locator[node] = -1; - - if (--queue->nnodes > 0 && heap[queue->nnodes].val != node) { - node = heap[queue->nnodes].val; - newgain = heap[queue->nnodes].key; - oldgain = heap[i].key; - - if (oldgain < newgain) { - /* Filter-up */ - while (i > 0) { - j = (i-1)>>1; - if (heap[j].key < newgain) { - heap[i] = heap[j]; - locator[heap[i].val] = i; - i = j; - } - else - break; - } - } - else { - /* Filter down */ - while ((j=2*i+1) < queue->nnodes) { - if (heap[j].key > newgain) { - if (j+1 < queue->nnodes && heap[j+1].key > heap[j].key) - j = j+1; - heap[i] = heap[j]; - locator[heap[i].val] = i; - i = j; - } - else if (j+1 < queue->nnodes && heap[j+1].key > newgain) { - j = j+1; - heap[i] = heap[j]; - locator[heap[i].val] = i; - i = j; - } - else - break; - } - } - - heap[i].key = newgain; - heap[i].val = node; - locator[node] = i; - } - - ASSERTS(CheckHeapFloat(queue)); - - return 0; -} - - -/************************************************************************* -* This function deletes a node from a partition and reinserts it with -* an updated gain -**************************************************************************/ -int FPQueueUpdate(FPQueueType *queue, int node, float newgain) -{ - int i, j; - idxtype *locator; - FKeyValueType *heap; - float oldgain; - - heap = queue->heap; - locator = queue->locator; - - ASSERTS(locator[node] != -1); - ASSERTS(heap[locator[node]].val == node); - ASSERTS(CheckHeapFloat(queue)); - - oldgain = heap[locator[node]].key; - - if (fabs(oldgain - newgain) < SMALLFLOAT) - return 0; - - i = locator[node]; - - if (oldgain < newgain) { - /* Filter-up */ - while (i > 0) { - j = (i-1)>>1; - if (heap[j].key < newgain) { - heap[i] = heap[j]; - locator[heap[i].val] = i; - i = j; - } - else - break; - } - } - else { - /* Filter down */ - while ((j=2*i+1) < queue->nnodes) { - if (heap[j].key > newgain) { - if (j+1 < queue->nnodes && heap[j+1].key > heap[j].key) - j = j+1; - heap[i] = heap[j]; - locator[heap[i].val] = i; - i = j; - } - else if (j+1 < queue->nnodes && heap[j+1].key > newgain) { - j = j+1; - heap[i] = heap[j]; - locator[heap[i].val] = i; - i = j; - } - else - break; - } - } - - heap[i].key = newgain; - heap[i].val = node; - locator[node] = i; - - ASSERTS(CheckHeapFloat(queue)); - - return 0; -} - - - -/************************************************************************* -* This function returns the vertex with the largest gain from a partition -* and removes the node from the bucket list -**************************************************************************/ -int FPQueueGetMax(FPQueueType *queue) -{ - int vtx, i, j, node; - float gain; - idxtype *locator; - FKeyValueType *heap; - - if (queue->nnodes == 0) - return -1; - - queue->nnodes--; - - heap = queue->heap; - locator = queue->locator; - - vtx = heap[0].val; - locator[vtx] = -1; - - if ((i = queue->nnodes) > 0) { - gain = heap[i].key; - node = heap[i].val; - i = 0; - while ((j=2*i+1) < queue->nnodes) { - if (heap[j].key > gain) { - if (j+1 < queue->nnodes && heap[j+1].key > heap[j].key) - j = j+1; - heap[i] = heap[j]; - locator[heap[i].val] = i; - i = j; - } - else if (j+1 < queue->nnodes && heap[j+1].key > gain) { - j = j+1; - heap[i] = heap[j]; - locator[heap[i].val] = i; - i = j; - } - else - break; - } - - heap[i].key = gain; - heap[i].val = node; - locator[node] = i; - } - - ASSERTS(CheckHeapFloat(queue)); - return vtx; -} - - -/************************************************************************* -* This function returns the vertex with the largest gain from a partition -**************************************************************************/ -int FPQueueSeeMaxVtx(FPQueueType *queue) -{ - int vtx; - - if (queue->nnodes == 0) - return -1; - - vtx = queue->heap[0].val; - - return vtx; -} - - -/************************************************************************* -* This function returns the vertex with the largest gain from a partition -**************************************************************************/ -float FPQueueSeeMaxGain(FPQueueType *queue) -{ - float gain; - - if (queue->nnodes == 0) - return 0.0; - - gain = queue->heap[0].key; - - return gain; -} - - -/************************************************************************* -* This function returns the vertex with the largest gain from a partition -**************************************************************************/ -float FPQueueGetKey(FPQueueType *queue) -{ - int key; - - if (queue->nnodes == 0) - return -1; - - key = queue->heap[0].key; - - return key; -} - -/************************************************************************* -* This function returns the number of nodes in the queue -**************************************************************************/ -int FPQueueGetQSize(FPQueueType *queue) -{ - return queue->nnodes; -} - - - - - - -/************************************************************************* -* This functions checks the consistency of the heap -**************************************************************************/ -int CheckHeapFloat(FPQueueType *queue) -{ - int i, j, nnodes; - idxtype *locator; - FKeyValueType *heap; - - return 1; - - heap = queue->heap; - locator = queue->locator; - nnodes = queue->nnodes; - - if (nnodes == 0) - return 1; - - ASSERTS(locator[heap[0].val] == 0); - for (i=1; imaxnodes; i++) { - if (locator[i] != -1) - j++; - } - ASSERTS(j == nnodes); - - return 1; -} diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/frename.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/frename.c index 551dd596..4ad61727 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/frename.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/frename.c @@ -7,26 +7,21 @@ * Started 6/1/98 * George * - * $Id: frename.c,v 1.4 2003/07/30 18:37:58 karypis Exp $ + * $Id: frename.c 13945 2013-03-30 14:38:24Z karypis $ * */ #include - /************************************************************************* * Renaming macro (at least to save some typing :)) **************************************************************************/ #define FRENAME(name0, name1, name2, name3, name4, dargs, cargs) \ - void name1 dargs { name0 cargs; } \ - void name2 dargs { name0 cargs; } \ - void name3 dargs { name0 cargs; } \ - void name4 dargs { name0 cargs; } - - - - + int name1 dargs {MPI_Comm comm = MPI_Comm_f2c(*icomm); return name0 cargs; }\ + int name2 dargs {MPI_Comm comm = MPI_Comm_f2c(*icomm); return name0 cargs; }\ + int name3 dargs {MPI_Comm comm = MPI_Comm_f2c(*icomm); return name0 cargs; }\ + int name4 dargs {MPI_Comm comm = MPI_Comm_f2c(*icomm); return name0 cargs; } @@ -39,12 +34,12 @@ FRENAME(ParMETIS_V3_AdaptiveRepart, parmetis_v3_adaptiverepart, parmetis_v3_adaptiverepart_, parmetis_v3_adaptiverepart__, - (idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *vsize, idxtype *adjwgt, int *wgtflag, int *numflag, int *ncon, - int *nparts, float *tpwgts, float *ubvec, float *ipc2redist, - int *options, int *edgecut, idxtype *part, MPI_Comm *comm), + (idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t *vsize, idx_t *adjwgt, idx_t *wgtflag, idx_t *numflag, idx_t *ncon, + idx_t *nparts, real_t *tpwgts, real_t *ubvec, real_t *ipc2redist, + idx_t *options, idx_t *edgecut, idx_t *part, MPI_Fint *icomm), (vtxdist, xadj, adjncy, vwgt, vsize, adjwgt, wgtflag, numflag, ncon, - nparts, tpwgts, ubvec, ipc2redist, options, edgecut, part, comm) + nparts, tpwgts, ubvec, ipc2redist, options, edgecut, part, &comm) ) FRENAME(ParMETIS_V3_PartGeomKway, @@ -52,12 +47,12 @@ FRENAME(ParMETIS_V3_PartGeomKway, parmetis_v3_partgeomkway, parmetis_v3_partgeomkway_, parmetis_v3_partgeomkway__, - (idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *ndims, float *xyz, - int *ncon, int *nparts, float *tpwgts, float *ubvec, int *options, - int *edgecut, idxtype *part, MPI_Comm *comm), + (idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t *adjwgt, idx_t *wgtflag, idx_t *numflag, idx_t *ndims, real_t *xyz, + idx_t *ncon, idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, + idx_t *edgecut, idx_t *part, MPI_Fint *icomm), (vtxdist, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, ndims, xyz, - ncon, nparts, tpwgts, ubvec, options, edgecut, part, comm) + ncon, nparts, tpwgts, ubvec, options, edgecut, part, &comm) ) FRENAME(ParMETIS_V3_PartGeom, @@ -65,8 +60,8 @@ FRENAME(ParMETIS_V3_PartGeom, parmetis_v3_partgeom, parmetis_v3_partgeom_, parmetis_v3_partgeom__, - (idxtype *vtxdist, int *ndims, float *xyz, idxtype *part, MPI_Comm *comm), - (vtxdist, ndims, xyz, part, comm) + (idx_t *vtxdist, idx_t *ndims, real_t *xyz, idx_t *part, MPI_Fint *icomm), + (vtxdist, ndims, xyz, part, &comm) ) FRENAME(ParMETIS_V3_PartKway, @@ -74,11 +69,11 @@ FRENAME(ParMETIS_V3_PartKway, parmetis_v3_partkway, parmetis_v3_partkway_, parmetis_v3_partkway__, - (idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, - int *wgtflag, int *numflag, int *ncon, int *nparts, float *tpwgts, float *ubvec, - int *options, int *edgecut, idxtype *part, MPI_Comm *comm), + (idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, idx_t *adjwgt, + idx_t *wgtflag, idx_t *numflag, idx_t *ncon, idx_t *nparts, real_t *tpwgts, real_t *ubvec, + idx_t *options, idx_t *edgecut, idx_t *part, MPI_Fint *icomm), (vtxdist, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, ncon, nparts, tpwgts, - ubvec, options, edgecut, part, comm) + ubvec, options, edgecut, part, &comm) ) FRENAME(ParMETIS_V3_Mesh2Dual, @@ -86,9 +81,9 @@ FRENAME(ParMETIS_V3_Mesh2Dual, parmetis_v3_mesh2dual, parmetis_v3_mesh2dual_, parmetis_v3_mesh2dual__, - (idxtype *elmdist, idxtype *eptr, idxtype *eind, int *numflag, int *ncommonnodes, - idxtype **xadj, idxtype **adjncy, MPI_Comm *comm), - (elmdist, eptr, eind, numflag, ncommonnodes, xadj, adjncy, comm) + (idx_t *elmdist, idx_t *eptr, idx_t *eind, idx_t *numflag, idx_t *ncommonnodes, + idx_t **xadj, idx_t **adjncy, MPI_Fint *icomm), + (elmdist, eptr, eind, numflag, ncommonnodes, xadj, adjncy, &comm) ) FRENAME(ParMETIS_V3_PartMeshKway, @@ -96,11 +91,11 @@ FRENAME(ParMETIS_V3_PartMeshKway, parmetis_v3_partmeshkway, parmetis_v3_partmeshkway_, parmetis_v3_partmeshkway__, - (idxtype *elmdist, idxtype *eptr, idxtype *eind, idxtype *elmwgt, int *wgtflag, - int *numflag, int *ncon, int *ncommonnodes, int *nparts, float *tpwgts, - float *ubvec, int *options, int *edgecut, idxtype *part, MPI_Comm *comm), + (idx_t *elmdist, idx_t *eptr, idx_t *eind, idx_t *elmwgt, idx_t *wgtflag, + idx_t *numflag, idx_t *ncon, idx_t *ncommonnodes, idx_t *nparts, real_t *tpwgts, + real_t *ubvec, idx_t *options, idx_t *edgecut, idx_t *part, MPI_Fint *icomm), (elmdist, eptr, eind, elmwgt, wgtflag, numflag, ncon, ncommonnodes, nparts, tpwgts, - ubvec, options, edgecut, part, comm) + ubvec, options, edgecut, part, &comm) ) FRENAME(ParMETIS_V3_NodeND, @@ -108,9 +103,9 @@ FRENAME(ParMETIS_V3_NodeND, parmetis_v3_nodend, parmetis_v3_nodend_, parmetis_v3_nodend__, - (idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, - idxtype *order, idxtype *sizes, MPI_Comm *comm), - (vtxdist, xadj, adjncy, numflag, options, order, sizes, comm) + (idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *numflag, idx_t *options, + idx_t *order, idx_t *sizes, MPI_Fint *icomm), + (vtxdist, xadj, adjncy, numflag, options, order, sizes, &comm) ) FRENAME(ParMETIS_V3_RefineKway, @@ -118,205 +113,10 @@ FRENAME(ParMETIS_V3_RefineKway, parmetis_v3_refinekway, parmetis_v3_refinekway_, parmetis_v3_refinekway__, - (idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, - int *wgtflag, int *numflag, int *ncon, int *nparts, float *tpwgts, float *ubvec, - int *options, int *edgecut, idxtype *part, MPI_Comm *comm), + (idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, idx_t *adjwgt, + idx_t *wgtflag, idx_t *numflag, idx_t *ncon, idx_t *nparts, real_t *tpwgts, real_t *ubvec, + idx_t *options, idx_t *edgecut, idx_t *part, MPI_Fint *icomm), (vtxdist, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, ncon, nparts, tpwgts, - ubvec, options, edgecut, part, comm) -) - - -/************************************************************************* -* Renames for Release 2.0 API -**************************************************************************/ -FRENAME(ParMETIS_PartKway, - PARMETIS_PARTKWAY, - parmetis_partkway, - parmetis_partkway_, - parmetis_partkway__, - (idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, - int *wgtflag, int *numflag, int *nparts, int *options, int *edgecut, idxtype *part, - MPI_Comm *comm), - (vtxdist, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, options, edgecut, - part, comm) -) - -FRENAME(ParMETIS_PartGeomKway, - PARMETIS_PARTGEOMKWAY, - parmetis_partgeomkway, - parmetis_partgeomkway_, - parmetis_partgeomkway__, - (idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, - int *wgtflag, int *numflag, int *ndims, float *xyz, int *nparts, int *options, - int *edgecut, idxtype *part, MPI_Comm *comm), - (vtxdist, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, ndims, xyz, nparts, options, - edgecut, part, comm) -) - -FRENAME(ParMETIS_PartGeom, - PARMETIS_PARTGEOM, - parmetis_partgeom, - parmetis_partgeom_, - parmetis_partgeom__, - (idxtype *vtxdist, int *ndims, float *xyz, idxtype *part, MPI_Comm *comm), - (vtxdist, ndims, xyz, part, comm) -) - -FRENAME(ParMETIS_PartGeomRefine, - PARMETIS_PARTGEOMREFINE, - parmetis_partgeomrefine, - parmetis_partgeomrefine_, - parmetis_partgeomrefine__, - (idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, - int *wgtflag, int *numflag, int *ndims, float *xyz, int *options, int *edgecut, - idxtype *part, MPI_Comm *comm), - (vtxdist, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, ndims, xyz, options, - edgecut, part, comm) -) - -FRENAME(ParMETIS_RefineKway, - PARMETIS_REFINEKWAY, - parmetis_refinekway, - parmetis_refinekway_, - parmetis_refinekway__, - (idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, - int *wgtflag, int *numflag, int *options, int *edgecut, idxtype *part, MPI_Comm *comm), - (vtxdist, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, options, edgecut, part, comm) -) - -FRENAME(ParMETIS_RepartLDiffusion, - PARMETIS_REPARTLDIFUSSION, - parmetis_repartldiffusion, - parmetis_repartldiffusion_, - parmetis_repartldiffusion__, - (idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, - int *wgtflag, int *numflag, int *options, int *edgecut, idxtype *part, MPI_Comm *comm), - (vtxdist, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, options, edgecut, part, comm) -) - -FRENAME(ParMETIS_RepartGDiffusion, - PARMETIS_REPARTGDIFFUSION, - parmetis_repartgdiffusion, - parmetis_repartgdiffusion_, - parmetis_repartgdiffusion__, - (idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, - int *wgtflag, int *numflag, int *options, int *edgecut, idxtype *part, MPI_Comm *comm), - (vtxdist, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, options, edgecut, part, comm) -) - -FRENAME(ParMETIS_RepartRemap, - PARMETIS_REPARTREMAP, - parmetis_repartremap, - parmetis_repartremap_, - parmetis_repartremap__, - (idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, - int *wgtflag, int *numflag, int *options, int *edgecut, idxtype *part, MPI_Comm *comm), - (vtxdist, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, options, edgecut, part, comm) -) - -FRENAME(ParMETIS_RepartMLRemap, - PARMETIS_REPARTMLREMAP, - parmetis_repartmlremap, - parmetis_repartmlremap_, - parmetis_repartmlremap__, - (idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, - int *wgtflag, int *numflag, int *options, int *edgecut, idxtype *part, MPI_Comm *comm), - (vtxdist, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, options, edgecut, part, comm) -) - -FRENAME(ParMETIS_NodeND, - PARMETIS_NODEND, - parmetis_nodend, - parmetis_nodend_, - parmetis_nodend__, - (idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, - idxtype *order, idxtype *sizes, MPI_Comm *comm), - (vtxdist, xadj, adjncy, numflag, options, order, sizes, comm) -) - -FRENAME(ParMETIS_SerialNodeND, - PARMETIS_SERIALNODEND, - parmetis_serialnodend, - parmetis_serialnodend_, - parmetis_serialnodend__, - (idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, - idxtype *order, idxtype *sizes, MPI_Comm *comm), - (vtxdist, xadj, adjncy, numflag, options, order, sizes, comm) -) - - - - -/************************************************************************* -* Renames for Release 1.0 API -**************************************************************************/ -FRENAME(PARKMETIS, - PARKMETIS_, - parkmetis, - parkmetis_, - parkmetis__, - (idxtype *vtxdist, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, idxtype *adjwgt, - idxtype *part, int *options, MPI_Comm comm), - (vtxdist, xadj, vwgt, adjncy, adjwgt, part, options, comm) -) - -FRENAME(PARGKMETIS, - PARGKMETIS_, - pargkmetis, - pargkmetis_, - pargkmetis__, - (idxtype *vtxdist, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, idxtype *adjwgt, - int ndims, float *xyz, idxtype *part, int *options, MPI_Comm comm), - (vtxdist, xadj, vwgt, adjncy, adjwgt, ndims, xyz, part, options, comm) -) - -FRENAME(PARGRMETIS, - PARGRMETIS_, - pargrmetis, - pargrmetis_, - pargrmetis__, - (idxtype *vtxdist, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, idxtype *adjwgt, - int ndims, float *xyz, idxtype *part, int *options, MPI_Comm comm), - (vtxdist, xadj, vwgt, adjncy, adjwgt, ndims, xyz, part, options, comm) -) - -FRENAME(PARGMETIS, - PARGMETIS_, - pargmetis, - pargmetis_, - pargmetis__, - (idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int ndims, float *xyz, - idxtype *part, int *options, MPI_Comm comm), - (vtxdist, xadj, adjncy, ndims, xyz, part, options, comm) -) - -FRENAME(PARRMETIS, - PARRMETIS_, - parrmetis, - parrmetis_, - parrmetis__, - (idxtype *vtxdist, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, idxtype *adjwgt, - idxtype *part, int *options, MPI_Comm comm), - (vtxdist, xadj, vwgt, adjncy, adjwgt, part, options, comm) -) - -FRENAME(PARUAMETIS, - PARUAMETIS_, - paruametis, - paruametis_, - paruametis__, - (idxtype *vtxdist, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, idxtype *adjwgt, - idxtype *part, int *options, MPI_Comm comm), - (vtxdist, xadj, vwgt, adjncy, adjwgt, part, options, comm) -) - -FRENAME(PARDAMETIS, - PARDAMETIS_, - pardametis, - pardametis_, - pardametis__, - (idxtype *vtxdist, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, idxtype *adjwgt, - idxtype *part, int *options, MPI_Comm comm), - (vtxdist, xadj, vwgt, adjncy, adjwgt, part, options, comm) + ubvec, options, edgecut, part, &comm) ) diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/gklib.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/gklib.c new file mode 100644 index 00000000..bf4263ee --- /dev/null +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/gklib.c @@ -0,0 +1,120 @@ +/*! +\file gklib.c +\brief Various helper routines generated using GKlib's templates + +\date Started 4/12/2007 +\author George +\author Copyright 1997-2009, Regents of the University of Minnesota +\version\verbatim $Id: gklib.c 10395 2011-06-23 23:28:06Z karypis $ \endverbatim +*/ + + +#include "parmetislib.h" + + +/*************************************************************************/ +/*! BLAS routines */ +/*************************************************************************/ +GK_MKBLAS(i, idx_t, idx_t) +GK_MKBLAS(r, real_t, real_t) + +/*************************************************************************/ +/*! Memory allocation routines */ +/*************************************************************************/ +GK_MKALLOC(i, idx_t) +GK_MKALLOC(r, real_t) +GK_MKALLOC(ikv, ikv_t) +GK_MKALLOC(rkv, rkv_t) + +/*************************************************************************/ +/*! Priority queues routines */ +/*************************************************************************/ +#define key_gt(a, b) ((a) > (b)) +GK_MKPQUEUE(ipq, ipq_t, ikv_t, idx_t, idx_t, ikvmalloc, IDX_MAX, key_gt) +GK_MKPQUEUE(rpq, rpq_t, rkv_t, real_t, idx_t, rkvmalloc, REAL_MAX, key_gt) +#undef key_gt + +/*************************************************************************/ +/*! Random number generation routines */ +/*************************************************************************/ +GK_MKRANDOM(i, idx_t, idx_t) + +/*************************************************************************/ +/*! Utility routines */ +/*************************************************************************/ +GK_MKARRAY2CSR(i, idx_t) + +/*************************************************************************/ +/*! Sorting routines */ +/*************************************************************************/ +void isorti(size_t n, idx_t *base) +{ +#define i_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(idx_t, base, n, i_lt); +#undef i_lt +} + +void isortd(size_t n, idx_t *base) +{ +#define i_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(idx_t, base, n, i_gt); +#undef i_gt +} + +void rsorti(size_t n, real_t *base) +{ +#define r_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(real_t, base, n, r_lt); +#undef r_lt +} + +void rsortd(size_t n, real_t *base) +{ +#define r_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(real_t, base, n, r_gt); +#undef r_gt +} + +void ikvsorti(size_t n, ikv_t *base) +{ +#define ikey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(ikv_t, base, n, ikey_lt); +#undef ikey_lt +} + +/* Sorts based both on key and val */ +void ikvsortii(size_t n, ikv_t *base) +{ +#define ikeyval_lt(a, b) ((a)->key < (b)->key || ((a)->key == (b)->key && (a)->val < (b)->val)) + GK_MKQSORT(ikv_t, base, n, ikeyval_lt); +#undef ikeyval_lt +} + +void ikvsortd(size_t n, ikv_t *base) +{ +#define ikey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(ikv_t, base, n, ikey_gt); +#undef ikey_gt +} + +void rkvsorti(size_t n, rkv_t *base) +{ +#define rkey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(rkv_t, base, n, rkey_lt); +#undef rkey_lt +} + +void rkvsortd(size_t n, rkv_t *base) +{ +#define rkey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(rkv_t, base, n, rkey_gt); +#undef rkey_gt +} + +void uvwsorti(size_t n, uvw_t *base) +{ +#define uvwkey_lt(a, b) ((a)->u < (b)->u || ((a)->u == (b)->u && (a)->v < (b)->v)) + GK_MKQSORT(uvw_t, base, n, uvwkey_lt); +#undef uvwkey_lt +} + diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/gklib_defs.h b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/gklib_defs.h new file mode 100644 index 00000000..a341f0bf --- /dev/null +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/gklib_defs.h @@ -0,0 +1,53 @@ +/*! +\file +\brief Data structures and prototypes for GKlib integration + +\date Started 12/23/2008 +\author George +\version\verbatim $Id: gklib_defs.h 10395 2011-06-23 23:28:06Z karypis $ \endverbatim +*/ + +#ifndef _LIBPARMETIS_GKLIB_H_ +#define _LIBPARMETIS_GKLIB_H_ + +#include "gklib_rename.h" + +/*************************************************************************/ +/*! Stores a weighted edge */ +/*************************************************************************/ +typedef struct { + idx_t u, v, w; /*!< Edge (u,v) with weight w */ +} uvw_t; + +/************************************************************************* +* Define various data structure using GKlib's templates. +**************************************************************************/ +GK_MKKEYVALUE_T(ikv_t, idx_t, idx_t) +GK_MKKEYVALUE_T(rkv_t, real_t, idx_t) +GK_MKPQUEUE_T(ipq_t, ikv_t) +GK_MKPQUEUE_T(rpq_t, rkv_t) + + +/* gklib.c */ +GK_MKBLAS_PROTO(i, idx_t, idx_t) +GK_MKBLAS_PROTO(r, real_t, real_t) +GK_MKALLOC_PROTO(i, idx_t) +GK_MKALLOC_PROTO(r, real_t) +GK_MKALLOC_PROTO(ikv, ikv_t) +GK_MKALLOC_PROTO(rkv, rkv_t) +GK_MKPQUEUE_PROTO(ipq, ipq_t, idx_t, idx_t) +GK_MKPQUEUE_PROTO(rpq, rpq_t, real_t, idx_t) +GK_MKRANDOM_PROTO(i, idx_t, idx_t) +GK_MKARRAY2CSR_PROTO(i, idx_t) +void isorti(size_t n, idx_t *base); +void isortd(size_t n, idx_t *base); +void rsorti(size_t n, real_t *base); +void rsortd(size_t n, real_t *base); +void ikvsorti(size_t n, ikv_t *base); +void ikvsortii(size_t n, ikv_t *base); +void ikvsortd(size_t n, ikv_t *base); +void rkvsorti(size_t n, rkv_t *base); +void rkvsortd(size_t n, rkv_t *base); +void uvwsorti(size_t n, uvw_t *base); + +#endif diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/gklib_rename.h b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/gklib_rename.h new file mode 100644 index 00000000..a3dc4fcc --- /dev/null +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/gklib_rename.h @@ -0,0 +1,122 @@ +/*! +\file + + * Copyright 1997, Regents of the University of Minnesota + * + * This file contains header files + * + * Started 10/2/97 + * George + * + * $Id: gklib_rename.h 10395 2011-06-23 23:28:06Z karypis $ + * + */ + + +#ifndef _LIBPARMETIS_GKLIB_RENAME_H_ +#define _LIBPARMETIS_GKLIB_RENAME_H_ + +/* gklib.c - generated from the .o files using the ./utils/listundescapedsumbols.csh */ +#define iAllocMatrix libparmetis__iAllocMatrix +#define iFreeMatrix libparmetis__iFreeMatrix +#define iSetMatrix libparmetis__iSetMatrix +#define iargmax libparmetis__iargmax +#define iargmax_n libparmetis__iargmax_n +#define iargmin libparmetis__iargmin +#define iarray2csr libparmetis__iarray2csr +#define iaxpy libparmetis__iaxpy +#define icopy libparmetis__icopy +#define idot libparmetis__idot +#define iincset libparmetis__iincset +#define ikvAllocMatrix libparmetis__ikvAllocMatrix +#define ikvFreeMatrix libparmetis__ikvFreeMatrix +#define ikvSetMatrix libparmetis__ikvSetMatrix +#define ikvcopy libparmetis__ikvcopy +#define ikvmalloc libparmetis__ikvmalloc +#define ikvrealloc libparmetis__ikvrealloc +#define ikvset libparmetis__ikvset +#define ikvsmalloc libparmetis__ikvsmalloc +#define ikvsortd libparmetis__ikvsortd +#define ikvsorti libparmetis__ikvsorti +#define ikvsortii libparmetis__ikvsortii +#define imalloc libparmetis__imalloc +#define imax libparmetis__imax +#define imin libparmetis__imin +#define inorm2 libparmetis__inorm2 +#define ipqCheckHeap libparmetis__ipqCheckHeap +#define ipqCreate libparmetis__ipqCreate +#define ipqDelete libparmetis__ipqDelete +#define ipqDestroy libparmetis__ipqDestroy +#define ipqFree libparmetis__ipqFree +#define ipqGetTop libparmetis__ipqGetTop +#define ipqInit libparmetis__ipqInit +#define ipqInsert libparmetis__ipqInsert +#define ipqLength libparmetis__ipqLength +#define ipqReset libparmetis__ipqReset +#define ipqSeeKey libparmetis__ipqSeeKey +#define ipqSeeTopKey libparmetis__ipqSeeTopKey +#define ipqSeeTopVal libparmetis__ipqSeeTopVal +#define ipqUpdate libparmetis__ipqUpdate +#define isrand libparmetis__isrand +#define irand libparmetis__irand +#define irandArrayPermute libparmetis__irandArrayPermute +#define irandArrayPermuteFine libparmetis__irandArrayPermuteFine +#define irandInRange libparmetis__irandInRange +#define irealloc libparmetis__irealloc +#define iscale libparmetis__iscale +#define iset libparmetis__iset +#define ismalloc libparmetis__ismalloc +#define isortd libparmetis__isortd +#define isorti libparmetis__isorti +#define isrand libparmetis__isrand +#define isum libparmetis__isum +#define rAllocMatrix libparmetis__rAllocMatrix +#define rFreeMatrix libparmetis__rFreeMatrix +#define rSetMatrix libparmetis__rSetMatrix +#define rargmax libparmetis__rargmax +#define rargmax_n libparmetis__rargmax_n +#define rargmin libparmetis__rargmin +#define raxpy libparmetis__raxpy +#define rcopy libparmetis__rcopy +#define rdot libparmetis__rdot +#define rincset libparmetis__rincset +#define rkvAllocMatrix libparmetis__rkvAllocMatrix +#define rkvFreeMatrix libparmetis__rkvFreeMatrix +#define rkvSetMatrix libparmetis__rkvSetMatrix +#define rkvcopy libparmetis__rkvcopy +#define rkvmalloc libparmetis__rkvmalloc +#define rkvrealloc libparmetis__rkvrealloc +#define rkvset libparmetis__rkvset +#define rkvsmalloc libparmetis__rkvsmalloc +#define rkvsortd libparmetis__rkvsortd +#define rkvsorti libparmetis__rkvsorti +#define rmalloc libparmetis__rmalloc +#define rmax libparmetis__rmax +#define rmin libparmetis__rmin +#define rnorm2 libparmetis__rnorm2 +#define rpqCheckHeap libparmetis__rpqCheckHeap +#define rpqCreate libparmetis__rpqCreate +#define rpqDelete libparmetis__rpqDelete +#define rpqDestroy libparmetis__rpqDestroy +#define rpqFree libparmetis__rpqFree +#define rpqGetTop libparmetis__rpqGetTop +#define rpqInit libparmetis__rpqInit +#define rpqInsert libparmetis__rpqInsert +#define rpqLength libparmetis__rpqLength +#define rpqReset libparmetis__rpqReset +#define rpqSeeKey libparmetis__rpqSeeKey +#define rpqSeeTopKey libparmetis__rpqSeeTopKey +#define rpqSeeTopVal libparmetis__rpqSeeTopVal +#define rpqUpdate libparmetis__rpqUpdate +#define rrealloc libparmetis__rrealloc +#define rscale libparmetis__rscale +#define rset libparmetis__rset +#define rsmalloc libparmetis__rsmalloc +#define rsortd libparmetis__rsortd +#define rsorti libparmetis__rsorti +#define rsum libparmetis__rsum +#define uvwsorti libparmetis__uvwsorti + +#endif + + diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/gkmetis.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/gkmetis.c index 241e84f3..e8327f8c 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/gkmetis.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/gkmetis.c @@ -9,7 +9,7 @@ * Started 10/19/96 * George * - * $Id: gkmetis.c,v 1.8 2003/07/31 16:23:30 karypis Exp $ + * $Id: gkmetis.c 10663 2011-08-04 03:54:49Z karypis $ * */ @@ -17,259 +17,156 @@ - /*********************************************************************************** * This function is the entry point of the parallel kmetis algorithm that uses * coordinates to compute an initial graph distribution. ************************************************************************************/ -void ParMETIS_V3_PartGeomKway(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, - idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *ndims, - float *xyz, int *ncon, int *nparts, float *tpwgts, float *ubvec, - int *options, int *edgecut, idxtype *part, MPI_Comm *comm) +int ParMETIS_V3_PartGeomKway(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, + idx_t *vwgt, idx_t *adjwgt, idx_t *wgtflag, idx_t *numflag, idx_t *ndims, + real_t *xyz, idx_t *ncon, idx_t *nparts, real_t *tpwgts, real_t *ubvec, + idx_t *options, idx_t *edgecut, idx_t *part, MPI_Comm *comm) { - int h, i, j; - int nvtxs = -1, npes, mype; - int uwgtflag, cut, gcut, maxnvtxs; - int ltvwgts[MAXNCON]; - int moptions[10]; - CtrlType ctrl; - idxtype *uvwgt; - WorkSpaceType wspace; - GraphType *graph, *mgraph; - float avg, maximb, balance, *mytpwgts; - int seed, dbglvl = 0; - int iwgtflag, inumflag, incon, inparts, ioptions[10]; - float *itpwgts, iubvec[MAXNCON]; - - MPI_Comm_size(*comm, &npes); - MPI_Comm_rank(*comm, &mype); - - /* If too many processors switch to non-geometric partitioning. - This is to take care the current poor implementation of sorting - that has an npes*npes memory complexity. The following fix assumes - that the machine can allocate about 128MB of memory per node just - for sorting alone. - Also, if each processor does not have npes vertices, switch to the - non-geometric version of the code. - */ - ctrl.comm = *comm; - if (npes > 4096 || GlobalSEMin(&ctrl, vtxdist[mype+1]-vtxdist[mype]) < npes) { - return ParMETIS_V3_PartKway(vtxdist, xadj, adjncy, vwgt, adjwgt, wgtflag, - numflag, ncon, nparts, tpwgts, ubvec, options, edgecut, part, comm); + idx_t h, i, j, npes, mype, status, nvtxs, seed, dbglvl; + idx_t cut, gcut, maxnvtxs; + idx_t moptions[METIS_NOPTIONS]; + ctrl_t *ctrl; + graph_t *graph, *mgraph; + real_t balance; + size_t curmem; + + /* Check the input parameters and return if an error */ + status = CheckInputsPartGeomKway(vtxdist, xadj, adjncy, vwgt, adjwgt, wgtflag, + numflag, ndims, xyz, ncon, nparts, tpwgts, ubvec, options, + edgecut, part, comm); + if (GlobalSEMinComm(*comm, status) == 0) + return METIS_ERROR; + + status = METIS_OK; + gk_malloc_init(); + curmem = gk_GetCurMemoryUsed(); + + /* Setup the ctrl */ + ctrl = SetupCtrl(PARMETIS_OP_GKMETIS, options, *ncon, *nparts, tpwgts, ubvec, *comm); + npes = ctrl->npes; + mype = ctrl->mype; + + /* Take care the nparts == 1 case */ + if (*nparts == 1) { + iset(vtxdist[mype+1]-vtxdist[mype], (*numflag == 0 ? 0 : 1), part); + *edgecut = 0; + goto DONE; } - /********************************/ - /* Try and take care bad inputs */ - /********************************/ - if (options != NULL && options[0] == 1) - dbglvl = options[PMV3_OPTION_DBGLVL]; + /* Take care of npes == 1 case */ + if (npes == 1) { + nvtxs = vtxdist[1] - vtxdist[0]; /* subtraction is required when numflag==1 */ - CheckInputs(STATIC_PARTITION, npes, dbglvl, wgtflag, &iwgtflag, numflag, &inumflag, - ncon, &incon, nparts, &inparts, tpwgts, &itpwgts, ubvec, iubvec, - NULL, NULL, options, ioptions, part, comm); + METIS_SetDefaultOptions(moptions); + moptions[METIS_OPTION_NUMBERING] = *numflag; + status = METIS_PartGraphKway(&nvtxs, ncon, xadj, adjncy, vwgt, NULL, adjwgt, + nparts, tpwgts, ubvec, moptions, edgecut, part); - /*********************************/ - /* Take care the nparts = 1 case */ - /*********************************/ - if (inparts <= 1) { - idxset(vtxdist[mype+1]-vtxdist[mype], 0, part); - *edgecut = 0; - return; + goto DONE; } - /******************************/ - /* Take care of npes = 1 case */ - /******************************/ - if (npes == 1 && inparts > 1) { - moptions[0] = 0; - nvtxs = vtxdist[1]; - if (incon == 1) { - METIS_WPartGraphKway(&nvtxs, xadj, adjncy, vwgt, adjwgt, &iwgtflag, &inumflag, - &inparts, itpwgts, moptions, edgecut, part); - } - else { - /* ADD: this is because METIS does not support tpwgts for all constraints */ - mytpwgts = fmalloc(inparts, "mytpwgts"); - for (i=0; i 0) + ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1); - moptions[7] = -1; - METIS_mCPartGraphRecursive2(&nvtxs, &incon, xadj, adjncy, vwgt, adjwgt, &iwgtflag, - &inumflag, &inparts, mytpwgts, moptions, edgecut, part); + graph = SetupGraph(ctrl, *ncon, vtxdist, xadj, vwgt, NULL, adjncy, adjwgt, *wgtflag); + gk_free((void **)&graph->nvwgt, LTERM); - GKfree((void **)&mytpwgts, LTERM); - } - return; - } + /* Allocate the workspace */ + AllocateWSpace(ctrl, 10*graph->nvtxs); - if (inumflag == 1) - ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1); + /* Compute the initial npes-way partitioning geometric partitioning */ + STARTTIMER(ctrl, ctrl->TotalTmr); + + Coordinate_Partition(ctrl, graph, *ndims, xyz, 1); + + STOPTIMER(ctrl, ctrl->TotalTmr); - /*****************************/ - /* Set up control structures */ - /*****************************/ - if (ioptions[0] == 1) { - dbglvl = ioptions[PMV3_OPTION_DBGLVL]; - seed = ioptions[PMV3_OPTION_SEED]; - } - else { - dbglvl = GLOBAL_DBGLVL; - seed = GLOBAL_SEED; - } - SetUpCtrl(&ctrl, npes, dbglvl, *comm); - ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*incon*amax(npes, inparts)); - ctrl.seed = (seed == 0) ? mype : seed*mype; - ctrl.sync = GlobalSEMax(&ctrl, seed); - ctrl.partType = STATIC_PARTITION; - ctrl.ps_relation = -1; - ctrl.tpwgts = itpwgts; - scopy(incon, iubvec, ctrl.ubvec); - - uwgtflag = iwgtflag|2; - uvwgt = idxsmalloc(vtxdist[mype+1]-vtxdist[mype], 1, "uvwgt"); - graph = Mc_SetUpGraph(&ctrl, 1, vtxdist, xadj, uvwgt, adjncy, adjwgt, &uwgtflag); - GKfree((void **)&graph->nvwgt, &uvwgt, LTERM); - - AllocateWSpace(&ctrl, graph, &wspace); - - /*================================================================= - * Compute the initial npes-way partitioning geometric partitioning - =================================================================*/ - IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); - - Coordinate_Partition(&ctrl, graph, *ndims, xyz, 1, &wspace); - - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); - IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); - - /*================================================================= - * Move the graph according to the partitioning - =================================================================*/ - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.MoveTmr)); - - graph->vwgt = ((iwgtflag&2) != 0) ? vwgt : idxsmalloc(graph->nvtxs*incon, 1, "vwgt"); - graph->ncon = incon; - j = ctrl.nparts; - ctrl.nparts = ctrl.npes; - mgraph = Mc_MoveGraph(&ctrl, graph, &wspace); - ctrl.nparts = j; - - /**********************************************************/ - /* Do the same functionality as Mc_SetUpGraph for mgraph */ - /**********************************************************/ - /* compute tvwgts */ - for (j=0; jnvtxs; i++) - for (j=0; jvwgt[i*incon+j]; - - for (j=0; jnvwgt = fmalloc(mgraph->nvtxs*incon, "mgraph->nvwgt"); - for (i=0; invtxs; i++) - for (j=0; jnvwgt[i*incon+j] = (float)(mgraph->vwgt[i*incon+j]) / (float)(ctrl.tvwgts[j]); + /* Move the graph according to the partitioning */ + STARTTIMER(ctrl, ctrl->MoveTmr); + ctrl->nparts = npes; + mgraph = MoveGraph(ctrl, graph); + ctrl->nparts = *nparts; - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.MoveTmr)); + SetupGraph_nvwgts(ctrl, mgraph); /* compute nvwgts for the moved graph */ - if (ctrl.dbglvl&DBG_INFO) { - cut = 0; - for (i=0; invtxs; i++) - for (j=graph->xadj[i]; jxadj[i+1]; j++) + if (ctrl->dbglvl&DBG_INFO) { + CommInterfaceData(ctrl, graph, graph->where, graph->where+graph->nvtxs); + for (cut=0, i=0; invtxs; i++) { + for (j=graph->xadj[i]; jxadj[i+1]; j++) { if (graph->where[i] != graph->where[graph->adjncy[j]]) cut += graph->adjwgt[j]; - gcut = GlobalSESum(&ctrl, cut)/2; - maxnvtxs = GlobalSEMax(&ctrl, mgraph->nvtxs); - balance = (float)(maxnvtxs)/((float)(graph->gnvtxs)/(float)(npes)); - rprintf(&ctrl, "XYZ Cut: %6d \tBalance: %6.3f [%d %d %d]\n", - gcut, balance, maxnvtxs, graph->gnvtxs, npes); + } + } + gcut = GlobalSESum(ctrl, cut)/2; + maxnvtxs = GlobalSEMax(ctrl, mgraph->nvtxs); + balance = (real_t)(maxnvtxs)/((real_t)(graph->gnvtxs)/(real_t)(npes)); + rprintf(ctrl, "XYZ Cut: %6"PRIDX" \tBalance: %6.3"PRREAL" [%"PRIDX" %"PRIDX" %"PRIDX"]\n", + gcut, balance, maxnvtxs, graph->gnvtxs, npes); } - /*================================================================= - * Set up the newly moved graph - =================================================================*/ - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); - - ctrl.nparts = inparts; - AdjustWSpace(&ctrl, mgraph, &wspace); - - /*======================================================= - * Now compute the partition of the moved graph - =======================================================*/ - if (vtxdist[npes] < SMALLGRAPH || - vtxdist[npes] < npes*20 || - GlobalSESum(&ctrl, mgraph->nedges) == 0) { - IFSET(ctrl.dbglvl, DBG_INFO, rprintf(&ctrl, "Partitioning a graph of size %d serially\n", vtxdist[npes])); - PartitionSmallGraph(&ctrl, mgraph, &wspace); + STOPTIMER(ctrl, ctrl->MoveTmr); + + + /* Compute the partition of the moved graph */ + STARTTIMER(ctrl, ctrl->TotalTmr); + + ctrl->CoarsenTo = gk_min(vtxdist[npes]+1, 25*(*ncon)*gk_max(npes, *nparts)); + + if (vtxdist[npes] < SMALLGRAPH + || vtxdist[npes] < npes*20 + || GlobalSESum(ctrl, mgraph->nedges) == 0) { /* serially */ + IFSET(ctrl->dbglvl, DBG_INFO, + rprintf(ctrl, "Partitioning a graph of size %"PRIDX" serially\n", vtxdist[npes])); + PartitionSmallGraph(ctrl, mgraph); } - else { - Mc_Global_Partition(&ctrl, mgraph, &wspace); + else { /* in parallel */ + Global_Partition(ctrl, mgraph); } - ParallelReMapGraph(&ctrl, mgraph, &wspace); + + ParallelReMapGraph(ctrl, mgraph); /* Invert the ordering back to the original graph */ - ctrl.nparts = npes; - ProjectInfoBack(&ctrl, graph, part, mgraph->where, &wspace); + ctrl->nparts = npes; + ProjectInfoBack(ctrl, graph, part, mgraph->where); + ctrl->nparts = *nparts; *edgecut = mgraph->mincut; - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); - - /*******************/ - /* Print out stats */ - /*******************/ - IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - - if (ctrl.dbglvl&DBG_INFO) { - rprintf(&ctrl, "Final %d-way CUT: %6d \tBalance: ", inparts, mgraph->mincut); - avg = 0.0; - for (h=0; hgnpwgts[i*incon+h]/itpwgts[i*incon+h]); - avg += maximb; - rprintf(&ctrl, "%.3f ", maximb); - } - rprintf(&ctrl, " avg: %.3f\n", avg/(float)incon); - } + STOPTIMER(ctrl, ctrl->TotalTmr); + + + /* Print some stats */ + IFSET(ctrl->dbglvl, DBG_TIME, PrintTimingInfo(ctrl)); + IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->gcomm)); + IFSET(ctrl->dbglvl, DBG_INFO, PrintPostPartInfo(ctrl, mgraph, 0)); - GKfree((void **)&itpwgts, LTERM); FreeGraph(mgraph); - FreeInitialGraphAndRemap(graph, iwgtflag, 1); - FreeWSpace(&wspace); - FreeCtrl(&ctrl); + FreeInitialGraphAndRemap(graph); - if (inumflag == 1) + if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0); +DONE: + FreeCtrl(&ctrl); + if (gk_GetCurMemoryUsed() - curmem > 0) { + printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n", + (ssize_t)(gk_GetCurMemoryUsed() - curmem)); + } + gk_malloc_cleanup(0); + + return (int)status; } @@ -279,80 +176,80 @@ void ParMETIS_V3_PartGeomKway(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, * This function assumes that the graph is already nice partitioned among the * processors and then proceeds to perform recursive bisection. ************************************************************************************/ -void ParMETIS_V3_PartGeom(idxtype *vtxdist, int *ndims, float *xyz, idxtype *part, MPI_Comm *comm) +int ParMETIS_V3_PartGeom(idx_t *vtxdist, idx_t *ndims, real_t *xyz, idx_t *part, + MPI_Comm *comm) { - int i, npes, mype, nvtxs, firstvtx, dbglvl; - idxtype *xadj, *adjncy; - CtrlType ctrl; - WorkSpaceType wspace; - GraphType *graph; - int zeroflg = 0; + idx_t i, nvtxs, firstvtx, npes, mype, status; + idx_t *xadj, *adjncy; + ctrl_t *ctrl=NULL; + graph_t *graph=NULL; + size_t curmem; - MPI_Comm_size(*comm, &npes); - MPI_Comm_rank(*comm, &mype); - if (npes == 1) { - idxset(vtxdist[mype+1]-vtxdist[mype], 0, part); - return; - } + /* Check the input parameters and return if an error */ + status = CheckInputsPartGeom(vtxdist, ndims, xyz, part, comm); + if (GlobalSEMinComm(*comm, status) == 0) + return METIS_ERROR; + + status = METIS_OK; + gk_malloc_init(); + curmem = gk_GetCurMemoryUsed(); + + /* Setup the ctrl */ + ctrl = SetupCtrl(PARMETIS_OP_GMETIS, NULL, 1, 1, NULL, NULL, *comm); + /*ctrl->dbglvl=15;*/ + npes = ctrl->npes; + mype = ctrl->mype; - /* Return without computing a partitioning under the following cases: - - The number of processors is greater than 4096 (due to npes^2 - memory complexity of the sorting algorithm implemented). - - When each processor does not have at least npes elements. - These retrictions will be fixed in 4.0. - */ - ctrl.comm = *comm; - if (npes > 4096 || GlobalSEMin(&ctrl, vtxdist[mype+1]-vtxdist[mype]) < npes) { - if (mype == 1) - printf("ParMETIS_V3_PartGeom can only be used for less than 4096 processors " - "and when each processor has at least npes elements.\n"); - return; + + /* Trivial case when npes == 1 */ + if (npes == 1) { + iset(vtxdist[mype+1]-vtxdist[mype], 0, part); + goto DONE; } /* Setup a fake graph to allow the rest of the code to work unchanged */ - dbglvl = 0; - - nvtxs = vtxdist[mype+1]-vtxdist[mype]; + nvtxs = vtxdist[mype+1]-vtxdist[mype]; firstvtx = vtxdist[mype]; - xadj = idxmalloc(nvtxs+1, "ParMETIS_PartGeom: xadj"); - adjncy = idxmalloc(nvtxs, "ParMETIS_PartGeom: adjncy"); + xadj = imalloc(nvtxs+1, "ParMETIS_PartGeom: xadj"); + adjncy = imalloc(nvtxs, "ParMETIS_PartGeom: adjncy"); for (i=0; invtxs); - graph = Mc_SetUpGraph(&ctrl, 1, vtxdist, xadj, NULL, adjncy, NULL, &zeroflg); - AllocateWSpace(&ctrl, graph, &wspace); + /* Compute the initial geometric partitioning */ + STARTTIMER(ctrl, ctrl->TotalTmr); - /*======================================================= - * Compute the initial geometric partitioning - =======================================================*/ - IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); + Coordinate_Partition(ctrl, graph, *ndims, xyz, 0); + icopy(graph->nvtxs, graph->where, part); - Coordinate_Partition(&ctrl, graph, *ndims, xyz, 0, &wspace); + STOPTIMER(ctrl, ctrl->TotalTmr); + IFSET(ctrl->dbglvl, DBG_TIME, PrintTimingInfo(ctrl)); - idxcopy(graph->nvtxs, graph->where, part); - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); - IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); + gk_free((void **)&xadj, (void **)&adjncy, LTERM); + FreeInitialGraphAndRemap(graph); - FreeInitialGraphAndRemap(graph, 0, 1); - FreeWSpace(&wspace); + +DONE: FreeCtrl(&ctrl); + if (gk_GetCurMemoryUsed() - curmem > 0) { + printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n", + (ssize_t)(gk_GetCurMemoryUsed() - curmem)); + } + gk_malloc_cleanup(0); - GKfree((void **)&xadj, (void **)&adjncy, LTERM); + return (int)status; } diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/gkmpi.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/gkmpi.c new file mode 100644 index 00000000..7147cc56 --- /dev/null +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/gkmpi.c @@ -0,0 +1,404 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * gkmpi.c + * + * This function contains wrappers around MPI calls to allow + * future de-coupling of sizes from 'int' datatypes. + * + * Started 5/30/11 + * George + * + * $Id: gkmpi.c 10022 2011-05-30 20:18:42Z karypis $ + */ + +#include + +int gkMPI_Comm_size(MPI_Comm comm, idx_t *size) +{ + int status, lsize; + + status = MPI_Comm_size(comm, &lsize); + *size = lsize; + + return status; +} + +int gkMPI_Comm_rank(MPI_Comm comm, idx_t *rank) +{ + int status, lrank; + + status = MPI_Comm_rank(comm, &lrank); + *rank = lrank; + + return status; +} + +int gkMPI_Get_count(MPI_Status *status, MPI_Datatype datatype, + idx_t *count) +{ + int rstatus; + +#if MPI_VERSION < 4 + int lcount; + rstatus = MPI_Get_count(status, datatype, &lcount); +#else + MPI_Count lcount; + rstatus = MPI_Get_count_c(status, datatype, &lcount); +#endif + + *count = lcount; + + return rstatus; +} + +int gkMPI_Send(void *buf, idx_t count, MPI_Datatype datatype, idx_t dest, + idx_t tag, MPI_Comm comm) +{ +#if MPI_VERSION < 4 + return MPI_Send(buf, count, datatype, dest, tag, comm); +#else + return MPI_Send_c(buf, count, datatype, dest, tag, comm); +#endif +} + +int gkMPI_Recv(void *buf, idx_t count, MPI_Datatype datatype, + idx_t source, idx_t tag, MPI_Comm comm, MPI_Status *status) +{ +#if MPI_VERSION < 4 + return MPI_Recv(buf, count, datatype, source, tag, comm, status); +#else + return MPI_Recv_c(buf, count, datatype, source, tag, comm, status); +#endif +} + +int gkMPI_Isend(void *buf, idx_t count, MPI_Datatype datatype, idx_t dest, + idx_t tag, MPI_Comm comm, MPI_Request *request) +{ +#if MPI_VERSION < 4 + return MPI_Isend(buf, count, datatype, dest, tag, comm, request); +#else + return MPI_Isend_c(buf, count, datatype, dest, tag, comm, request); +#endif +} + +int gkMPI_Irecv(void *buf, idx_t count, MPI_Datatype datatype, + idx_t source, idx_t tag, MPI_Comm comm, MPI_Request *request) +{ +#if MPI_VERSION < 4 + return MPI_Irecv(buf, count, datatype, source, tag, comm, request); +#else + return MPI_Irecv_c(buf, count, datatype, source, tag, comm, request); +#endif +} + +int gkMPI_Wait(MPI_Request *request, MPI_Status *status) +{ + return MPI_Wait(request, status); +} + +int gkMPI_Waitall(idx_t count, MPI_Request *array_of_requests, + MPI_Status *array_of_statuses) +{ + return MPI_Waitall(count, array_of_requests, array_of_statuses); +} + +int gkMPI_Barrier(MPI_Comm comm) +{ + return MPI_Barrier(comm); +} + +int gkMPI_Bcast(void *buffer, idx_t count, MPI_Datatype datatype, + idx_t root, MPI_Comm comm) +{ +#if MPI_VERSION < 4 + return MPI_Bcast(buffer, count, datatype, root, comm); +#else + return MPI_Bcast_c(buffer, count, datatype, root, comm); +#endif +} + +int gkMPI_Reduce(void *sendbuf, void *recvbuf, idx_t count, + MPI_Datatype datatype, MPI_Op op, idx_t root, MPI_Comm comm) +{ +#if MPI_VERSION < 4 + return MPI_Reduce(sendbuf, recvbuf, count, datatype, op, root, comm); +#else + return MPI_Reduce_c(sendbuf, recvbuf, count, datatype, op, root, comm); +#endif +} + +int gkMPI_Allreduce(void *sendbuf, void *recvbuf, idx_t count, + MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) +{ +#if MPI_VERSION < 4 + return MPI_Allreduce(sendbuf, recvbuf, count, datatype, op, comm); +#else + return MPI_Allreduce_c(sendbuf, recvbuf, count, datatype, op, comm); +#endif +} + +int gkMPI_Scan(void *sendbuf, void *recvbuf, idx_t count, + MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) +{ +#if MPI_VERSION < 4 + return MPI_Scan(sendbuf, recvbuf, count, datatype, op, comm); +#else + return MPI_Scan_c(sendbuf, recvbuf, count, datatype, op, comm); +#endif +} + +int gkMPI_Allgather(void *sendbuf, idx_t sendcount, + MPI_Datatype sendtype, void *recvbuf, idx_t recvcount, + MPI_Datatype recvtype, MPI_Comm comm) +{ +#if MPI_VERSION < 4 + return MPI_Allgather(sendbuf, sendcount, sendtype, recvbuf, + recvcount, recvtype, comm); +#else + return MPI_Allgather_c(sendbuf, sendcount, sendtype, recvbuf, + recvcount, recvtype, comm); +#endif +} + +int gkMPI_Alltoall(void *sendbuf, idx_t sendcount, + MPI_Datatype sendtype, void *recvbuf, idx_t recvcount, + MPI_Datatype recvtype, MPI_Comm comm) +{ +#if MPI_VERSION < 4 + return MPI_Alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, + recvtype, comm); +#else + return MPI_Alltoall_c(sendbuf, sendcount, sendtype, recvbuf, recvcount, + recvtype, comm); +#endif +} + +int gkMPI_Alltoallv(void *sendbuf, idx_t *sendcounts, + idx_t *sdispls, MPI_Datatype sendtype, void *recvbuf, + idx_t *recvcounts, idx_t *rdispls, MPI_Datatype recvtype, + MPI_Comm comm) +{ +#if MPI_VERSION < 4 +#if IDXTYPEWIDTH == 32 + return MPI_Alltoallv(sendbuf, sendcounts, sdispls, sendtype, + recvbuf, recvcounts, rdispls, recvtype, comm); +#else + idx_t i; + int status, npes, *lsendcounts, *lsdispls, *lrecvcounts, *lrdispls; + + MPI_Comm_size(comm, &npes); + + /* bail-out if MPI 3.x cannot handle such large counts */ + for (i=0; i= INT_MAX || sdispls[i] >= INT_MAX || + recvcounts[i] >= INT_MAX || rdispls[i] >= INT_MAX) + errexit("MPI_Gatherv message sizes goes over INT_MAX. Use MPI 4.x\n"); + } + + lsendcounts = gk_imalloc(npes, "lsendcounts"); + lsdispls = gk_imalloc(npes, "lsdispls"); + lrecvcounts = gk_imalloc(npes, "lrecvcounts"); + lrdispls = gk_imalloc(npes, "lrdispls"); + + for (i=0; i= INT_MAX || recvcounts[i] >= INT_MAX || rdispls[i] >= INT_MAX) + errexit("MPI_Allgatherv message sizes goes over INT_MAX. Use MPI 4.x\n"); + } + + lrecvcounts = gk_imalloc(npes, "lrecvcounts"); + lrdispls = gk_imalloc(npes, "lrdispls"); + + for (i=0; i= INT_MAX || recvcount >= INT_MAX || sdispls[i] >= INT_MAX) + errexit("MPI_Scatterv message sizes goes over INT_MAX. Use MPI 4.x\n"); + } + + lsendcounts = gk_imalloc(npes, "lsendcounts"); + lsdispls = gk_imalloc(npes, "lsdispls"); + + for (i=0; i= INT_MAX || recvcounts[i] >= INT_MAX || rdispls[i] >= INT_MAX) + errexit("MPI_Gatherv message sizes goes over INT_MAX. Use MPI 4.x\n"); + } + + + lrecvcounts = gk_imalloc(npes, "lrecvcounts"); + lrdispls = gk_imalloc(npes, "lrdispls"); + + for (i=0; i + + + +/*************************************************************************/ +/*! This function creates the graph from the user's inputs +*/ +/*************************************************************************/ +graph_t *SetupGraph(ctrl_t *ctrl, idx_t ncon, idx_t *vtxdist, idx_t *xadj, + idx_t *vwgt, idx_t *vsize, idx_t *adjncy, idx_t *adjwgt, + idx_t wgtflag) +{ + idx_t i, j; + graph_t *graph; + + graph = CreateGraph(); + graph->level = 0; + graph->gnvtxs = vtxdist[ctrl->npes]; + graph->nvtxs = vtxdist[ctrl->mype+1]-vtxdist[ctrl->mype]; + graph->ncon = ncon; + graph->nedges = xadj[graph->nvtxs]; + graph->xadj = xadj; + graph->vwgt = vwgt; + graph->vsize = vsize; + graph->adjncy = adjncy; + graph->adjwgt = adjwgt; + graph->vtxdist = vtxdist; + + graph->free_xadj = 0; + graph->free_adjncy = 0; + + /* allocate memory for weight arrays if not provided */ + if ((wgtflag&2) == 0 || vwgt == NULL) + graph->vwgt = ismalloc(graph->nvtxs*ncon, 1, "SetupGraph: vwgt"); + else + graph->free_vwgt = 0; + + if ((wgtflag&1) == 0 || adjwgt == NULL) + graph->adjwgt = ismalloc(graph->nedges, 1, "SetupGraph: adjwgt"); + else + graph->free_adjwgt = 0; + + + /* allocate memory for special arrays that apply only to some optypes */ + if (ctrl->optype == PARMETIS_OP_AMETIS || ctrl->optype == PARMETIS_OP_RMETIS) { + if (vsize == NULL) + graph->vsize = ismalloc(graph->nvtxs, 1, "vsize"); + else + graph->free_vsize = 0; + + graph->home = ismalloc(graph->nvtxs, 1, "home"); + + /* determine edge_size_ratio */ + ctrl->edge_size_ratio = + (.1+(real_t)GlobalSESum(ctrl, isum(graph->nedges, graph->adjwgt, 1))) / + (.1+(real_t)GlobalSESum(ctrl, isum(graph->nvtxs, graph->vsize, 1))); + } + + + /* compute invtvwgts */ + SetupCtrl_invtvwgts(ctrl, graph); + + + /* compute nvwgts */ + SetupGraph_nvwgts(ctrl, graph); + + return graph; +} + + +/*************************************************************************/ +/*! This function creates the nvwgt of the graph +*/ +/*************************************************************************/ +void SetupGraph_nvwgts(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, j, nvtxs, ncon; + idx_t *vwgt; + real_t *nvwgt, *invtvwgts; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + vwgt = graph->vwgt; + + invtvwgts = ctrl->invtvwgts; + + /* compute nvwgts */ + nvwgt = graph->nvwgt = rmalloc(nvtxs*ncon, "SetupGraph_nvwgts: graph->nvwgt"); + for (i=0; ignvtxs = graph->nvtxs = graph->nedges = graph->nsep = -1; + graph->nnbrs = graph->nrecv = graph->nsend = graph->nlocal = -1; + graph->xadj = graph->vwgt = graph->vsize = graph->adjncy = graph->adjwgt = NULL; + graph->nvwgt = NULL; + graph->vtxdist = NULL; + graph->match = graph->cmap = NULL; + graph->label = NULL; + + graph->peind = NULL; + graph->sendptr = graph->sendind = graph->recvptr = graph->recvind = NULL; + graph->imap = NULL; + graph->pexadj = graph->peadjncy = graph->peadjloc = NULL; + graph->lperm = NULL; + + graph->slens = graph->rlens = NULL; + graph->rcand = NULL; + + graph->where = graph->home = graph->lpwgts = graph->gpwgts = NULL; + graph->lnpwgts = graph->gnpwgts = NULL; + graph->ckrinfo = NULL; + + graph->nrinfo = NULL; + graph->sepind = NULL; + + graph->coarser = graph->finer = NULL; + + graph->free_xadj = graph->free_adjncy = graph->free_vwgt = graph->free_adjwgt = graph->free_vsize = 1; +} + + +/*************************************************************************/ +/*! This function deallocates any memory stored in a graph +*/ +/*************************************************************************/ +void FreeGraph(graph_t *graph) +{ + + /* Graph structure fields */ + gk_free((void **)&graph->xadj, + (void **)&graph->vwgt, + (void **)&graph->nvwgt, + (void **)&graph->vsize, + (void **)&graph->adjncy, + (void **)&graph->adjwgt, + (void **)&graph->vtxdist, + (void **)&graph->home, + LTERM); + + FreeNonGraphFields(graph); + + gk_free((void **)&graph, LTERM); +} + + +/*************************************************************************/ +/*! This function deallocates the non-graph structure fields of a graph + data structure */ +/*************************************************************************/ +void FreeNonGraphFields(graph_t *graph) +{ + + gk_free( + /* Coarsening fields */ + (void **)&graph->match, + (void **)&graph->cmap, + + /* Initial partitioning fields */ + (void **)&graph->label, + + /* Communication/Setup fields */ + (void **)&graph->peind, + (void **)&graph->sendptr, + (void **)&graph->sendind, + (void **)&graph->recvptr, + (void **)&graph->recvind, + (void **)&graph->imap, + (void **)&graph->pexadj, + (void **)&graph->peadjncy, + (void **)&graph->peadjloc, + (void **)&graph->lperm, + + /* Projection fields */ + (void **)&graph->rlens, + (void **)&graph->slens, + (void **)&graph->rcand, + + /* Refinement fields */ + (void **)&graph->where, + (void **)&graph->lpwgts, + (void **)&graph->gpwgts, + (void **)&graph->lnpwgts, + (void **)&graph->gnpwgts, + (void **)&graph->ckrinfo, + (void **)&graph->nrinfo, + (void **)&graph->sepind, + + LTERM); +} + + +/*************************************************************************/ +/*! This function deallocates the non-graph and non-setup structure fields + of a graph data structure */ +/*************************************************************************/ +void FreeNonGraphNonSetupFields(graph_t *graph) +{ + + gk_free( + /* Coarsening fields */ + (void **)&graph->match, + (void **)&graph->cmap, + + /* Initial partitioning fields */ + (void **)&graph->label, + + /* Projection fields */ + (void **)&graph->rlens, + (void **)&graph->slens, + (void **)&graph->rcand, + + /* Refinement fields */ + (void **)&graph->where, + (void **)&graph->lpwgts, + (void **)&graph->gpwgts, + (void **)&graph->lnpwgts, + (void **)&graph->gnpwgts, + (void **)&graph->ckrinfo, + (void **)&graph->nrinfo, + (void **)&graph->sepind, + + LTERM); +} + + +/*************************************************************************/ +/*! This function deallocates the fields created by the CommSetup() */ +/*************************************************************************/ +void FreeCommSetupFields(graph_t *graph) +{ + + gk_free( + /* Communication/Setup fields */ + (void **)&graph->lperm, + (void **)&graph->peind, + (void **)&graph->sendptr, + (void **)&graph->sendind, + (void **)&graph->recvptr, + (void **)&graph->recvind, + (void **)&graph->imap, + (void **)&graph->pexadj, + (void **)&graph->peadjncy, + (void **)&graph->peadjloc, + + LTERM); +} + + +/*************************************************************************/ +/*! This function frees any memory allocated for storing the initial graph + and performs the local to global (i.e., original numbering of the + adjacency list) +*/ +/*************************************************************************/ +void FreeInitialGraphAndRemap(graph_t *graph) +{ + idx_t i, nedges; + idx_t *adjncy, *imap; + + nedges = graph->nedges; + adjncy = graph->adjncy; + imap = graph->imap; + + if (imap != NULL) { + for (i=0; invwgt, &graph->home, &graph->lnpwgts, + &graph->gnpwgts, LTERM); + + if (graph->free_vwgt) + gk_free((void **)&graph->vwgt, LTERM); + if (graph->free_adjwgt) + gk_free((void **)&graph->adjwgt, LTERM); + if (graph->free_vsize) + gk_free((void **)&graph->vsize, LTERM); + + gk_free((void **)&graph, LTERM); +} + + +/*************************************************************************/ +/*! This function writes the key contents of the graph on disk and frees + the associated memory */ +/*************************************************************************/ +void graph_WriteToDisk(ctrl_t *ctrl, graph_t *graph) +{ + idx_t nvtxs, ncon, *xadj; + static int gID = 1; + char outfile[1024]; + FILE *fpout; + + if (ctrl->ondisk == 0) + return; + + if (sizeof(idx_t)*(graph->nvtxs*(graph->ncon+1)+2*graph->xadj[graph->nvtxs]) < 128*1024*1024) + return; + + if (graph->gID > 0) { + sprintf(outfile, "parmetis%d.%d.%d", (int)ctrl->mype, (int)ctrl->pid, graph->gID); + gk_rmpath(outfile); + } + + graph->gID = gID++; + sprintf(outfile, "parmetis%d.%d.%d", (int)ctrl->mype, (int)ctrl->pid, graph->gID); + + if ((fpout = fopen(outfile, "wb")) == NULL) + return; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + + if (graph->free_xadj) { + if (fwrite(graph->xadj, sizeof(idx_t), nvtxs+1, fpout) != nvtxs+1) + goto error; + } + if (graph->free_vwgt) { + if (fwrite(graph->vwgt, sizeof(idx_t), nvtxs*ncon, fpout) != nvtxs*ncon) + goto error; + } + if (fwrite(graph->nvwgt, sizeof(real_t), nvtxs*ncon, fpout) != nvtxs*ncon) + goto error; + + if (graph->free_adjncy) { + if (fwrite(graph->adjncy, sizeof(idx_t), xadj[nvtxs], fpout) != xadj[nvtxs]) + goto error; + } + if (graph->free_adjwgt) { + if (fwrite(graph->adjwgt, sizeof(idx_t), xadj[nvtxs], fpout) != xadj[nvtxs]) + goto error; + } + + if (ctrl->optype == PARMETIS_OP_AMETIS || ctrl->optype == PARMETIS_OP_RMETIS) { + if (graph->free_vsize) { + if (fwrite(graph->vsize, sizeof(idx_t), nvtxs, fpout) != nvtxs) + goto error; + } + + if (fwrite(graph->home, sizeof(idx_t), nvtxs, fpout) != nvtxs) + goto error; + } + + fclose(fpout); + + if (graph->free_xadj) + gk_free((void **)&graph->xadj, LTERM); + if (graph->free_vwgt) + gk_free((void **)&graph->vwgt, LTERM); + gk_free((void **)&graph->nvwgt, LTERM); + if (graph->free_vsize) + gk_free((void **)&graph->vsize, LTERM); + if (graph->free_adjncy) + gk_free((void **)&graph->adjncy, LTERM); + if (graph->free_adjwgt) + gk_free((void **)&graph->adjwgt, LTERM); + gk_free((void **)&graph->home, LTERM); + + graph->ondisk = 1; + return; + +error: + printf("Failed on writing %s\n", outfile); + fclose(fpout); + gk_rmpath(outfile); + graph->ondisk = 0; +} + + +/*************************************************************************/ +/*! This function reads the key contents of a graph from the disk */ +/*************************************************************************/ +void graph_ReadFromDisk(ctrl_t *ctrl, graph_t *graph) +{ + idx_t nvtxs, ncon, *xadj; + char infile[1024]; + FILE *fpin; + + if (graph->ondisk == 0) + return; /* this graph is not on the disk */ + + sprintf(infile, "parmetis%d.%d.%d", (int)ctrl->mype, (int)ctrl->pid, graph->gID); + + if ((fpin = fopen(infile, "rb")) == NULL) + return; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + + if (graph->free_xadj) { + graph->xadj = imalloc(nvtxs+1, "graph_ReadFromDisk: xadj"); + if (fread(graph->xadj, sizeof(idx_t), nvtxs+1, fpin) != nvtxs+1) + goto error; + } + xadj = graph->xadj; + + if (graph->free_vwgt) { + graph->vwgt = imalloc(nvtxs*ncon, "graph_ReadFromDisk: vwgt"); + if (fread(graph->vwgt, sizeof(idx_t), nvtxs*ncon, fpin) != nvtxs*ncon) + goto error; + } + + graph->nvwgt = rmalloc(nvtxs*ncon, "graph_ReadFromDisk: nvwgt"); + if (fread(graph->nvwgt, sizeof(real_t), nvtxs*ncon, fpin) != nvtxs*ncon) + goto error; + + if (graph->free_adjncy) { + graph->adjncy = imalloc(xadj[nvtxs], "graph_ReadFromDisk: adjncy"); + if (fread(graph->adjncy, sizeof(idx_t), xadj[nvtxs], fpin) != xadj[nvtxs]) + goto error; + } + + if (graph->free_adjwgt) { + graph->adjwgt = imalloc(xadj[nvtxs], "graph_ReadFromDisk: adjwgt"); + if (fread(graph->adjwgt, sizeof(idx_t), xadj[nvtxs], fpin) != xadj[nvtxs]) + goto error; + } + + if (ctrl->optype == PARMETIS_OP_AMETIS || ctrl->optype == PARMETIS_OP_RMETIS) { + if (graph->free_vsize) { + graph->vsize = imalloc(nvtxs, "graph_ReadFromDisk: vsize"); + if (fread(graph->vsize, sizeof(idx_t), nvtxs, fpin) != nvtxs) + goto error; + } + + graph->home = imalloc(nvtxs, "graph_ReadFromDisk: vsize"); + if (fread(graph->home, sizeof(idx_t), nvtxs, fpin) != nvtxs) + goto error; + } + + fclose(fpin); + //printf("ondisk: deleting %s\n", infile); + gk_rmpath(infile); + + graph->gID = 0; + graph->ondisk = 0; + return; + +error: + fclose(fpin); + gk_rmpath(infile); + graph->ondisk = 0; + gk_errexit(SIGERR, "Failed to restore graph %s from the disk.\n", infile); +} + diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/grsetup.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/grsetup.c deleted file mode 100644 index cefcac79..00000000 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/grsetup.c +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * mgrsetup.c - * - * This file contain various graph setting up routines - * - * Started 10/19/96 - * George - * - * $Id: grsetup.c,v 1.7 2003/07/23 00:54:55 karypis Exp $ - * - */ - -#include - - - -/*************************************************************************/ -/*! This function creates the graph from the user's inputs */ -/*************************************************************************/ -GraphType *Mc_SetUpGraph(CtrlType *ctrl, int ncon, idxtype *vtxdist, idxtype *xadj, - idxtype *vwgt, idxtype *adjncy, idxtype *adjwgt, int *wgtflag) -{ - int i, j; - GraphType *graph; - int ltvwgts[MAXNCON]; - - graph = CreateGraph(); - graph->level = 0; - graph->gnvtxs = vtxdist[ctrl->npes]; - graph->nvtxs = vtxdist[ctrl->mype+1]-vtxdist[ctrl->mype]; - graph->ncon = ncon; - graph->nedges = xadj[graph->nvtxs]; - graph->xadj = xadj; - graph->vwgt = vwgt; - graph->adjncy = adjncy; - graph->adjwgt = adjwgt; - graph->vtxdist = vtxdist; - - - if (((*wgtflag)&2) == 0) - graph->vwgt = idxsmalloc(graph->nvtxs*ncon, 1, "Par_KMetis: vwgt"); - - if (((*wgtflag)&1) == 0) - graph->adjwgt = idxsmalloc(graph->nedges, 1, "Par_KMetis: adjwgt"); - - /* compute tvwgts */ - for (j=0; jnvtxs; i++) - for (j=0; jvwgt[i*ncon+j]; - - for (j=0; jtvwgts[j] = GlobalSESum(ctrl, ltvwgts[j]); - - /* check for zero wgt constraints */ - for (i=0; itvwgts[i] == 0) { - rprintf(ctrl, "ERROR: sum weight for constraint %d is zero\n", i); - MPI_Finalize(); - exit(-1); - } - } - - /* compute nvwgts */ - graph->nvwgt = fmalloc(graph->nvtxs*ncon, "graph->nvwgt"); - for (i=0; invtxs; i++) { - for (j=0; jnvwgt[i*ncon+j] = (float)(graph->vwgt[i*ncon+j]) / (float)(ctrl->tvwgts[j]); - } - - srand(ctrl->seed); - - return graph; -} - - -/*************************************************************************/ -/*! This function sets the CtrlType structure */ -/*************************************************************************/ -void SetUpCtrl(CtrlType *ctrl, int nparts, int dbglvl, MPI_Comm comm) -{ - - MPI_Comm_dup(comm, &(ctrl->gcomm)); - MPI_Comm_rank(ctrl->gcomm, &ctrl->mype); - MPI_Comm_size(ctrl->gcomm, &ctrl->npes); - - ctrl->dbglvl = dbglvl; - ctrl->nparts = nparts; /* Set the # of partitions is de-coupled from the # of domains */ - ctrl->comm = ctrl->gcomm; - ctrl->xyztype = XYZ_SPFILL; - - srand(ctrl->mype); -} - - -/*************************************************************************/ -/*! Setups the global communicator and related info */ -/*************************************************************************/ -void SetUpComm(CtrlType *ctrl, MPI_Comm comm) -{ - - MPI_Comm_dup(comm, &(ctrl->gcomm)); - MPI_Comm_rank(ctrl->gcomm, &ctrl->mype); - MPI_Comm_size(ctrl->gcomm, &ctrl->npes); - - ctrl->comm = ctrl->gcomm; -} - - -/************************************************************************* -* This function changes the numbering from 1 to 0 or 0 to 1 -**************************************************************************/ -void ChangeNumbering(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *part, int npes, int mype, int from) -{ - int i, nvtxs; - - nvtxs = vtxdist[mype+1]-vtxdist[mype]; - - if (from == 1) { /* Change it from 1 to 0 */ - for (i=0; invtxs; i++) { - for (j=graph->xadj[i]; jxadj[i+1]; j++) { - k = graph->xadj[i] + RandomInRange(graph->xadj[i+1]-graph->xadj[i]); - SWAP(graph->adjncy[j], graph->adjncy[k], tmp); - SWAP(graph->adjwgt[j], graph->adjwgt[k], tmp); - } - } -} - - -/************************************************************************* -* This function computes movement statistics for adaptive refinement -* schemes -**************************************************************************/ -void ComputeMoveStatistics(CtrlType *ctrl, GraphType *graph, int *nmoved, int *maxin, int *maxout) -{ - int i, j, nvtxs; - idxtype *vwgt, *where; - idxtype *lpvtxs, *gpvtxs; - - nvtxs = graph->nvtxs; - vwgt = graph->vwgt; - where = graph->where; - - lpvtxs = idxsmalloc(ctrl->nparts, 0, "ComputeMoveStatistics: lpvtxs"); - gpvtxs = idxsmalloc(ctrl->nparts, 0, "ComputeMoveStatistics: gpvtxs"); - - for (j=i=0; imype) - j++; - } - - /* PrintVector(ctrl, ctrl->npes, 0, lpvtxs, "Lpvtxs: "); */ - - MPI_Allreduce((void *)lpvtxs, (void *)gpvtxs, ctrl->nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm); - - *nmoved = GlobalSESum(ctrl, j); - *maxout = GlobalSEMax(ctrl, j); - *maxin = GlobalSEMax(ctrl, gpvtxs[ctrl->mype]-(nvtxs-j)); - - GKfree((void **)&lpvtxs, (void **)&gpvtxs, LTERM); -} diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/iidxsort.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/iidxsort.c deleted file mode 100644 index 869748c3..00000000 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/iidxsort.c +++ /dev/null @@ -1,152 +0,0 @@ -#include - - -/* Byte-wise swap two items of size SIZE. */ -#define QSSWAP(a, b, stmp) do { stmp = (a); (a) = (b); (b) = stmp; } while (0) - -/* Discontinue quicksort algorithm when partition gets below this size. - This particular magic number was chosen to work best on a Sun 4/260. */ -#define MAX_THRESH 20 - -/* Stack node declarations used to store unfulfilled partition obligations. */ -typedef struct { - idxtype *lo; - idxtype *hi; -} stack_node; - - -/* The next 4 #defines implement a very fast in-line stack abstraction. */ -#define STACK_SIZE (8 * sizeof(unsigned long int)) -#define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top)) -#define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi))) -#define STACK_NOT_EMPTY (stack < top) - - -void iidxsort(int total_elems, idxtype *pbase) -{ - idxtype pivot, stmp; - - if (total_elems == 0) - /* Avoid lossage with unsigned arithmetic below. */ - return; - - if (total_elems > MAX_THRESH) { - idxtype *lo = pbase; - idxtype *hi = &lo[total_elems - 1]; - stack_node stack[STACK_SIZE]; /* Largest size needed for 32-bit int!!! */ - stack_node *top = stack + 1; - - while (STACK_NOT_EMPTY) { - idxtype *left_ptr; - idxtype *right_ptr; - idxtype *mid = lo + ((hi - lo) >> 1); - - - if (*mid < *lo) - QSSWAP(*mid, *lo, stmp); - if (*hi < *mid) - QSSWAP(*mid, *hi, stmp); - else - goto jump_over; - if (*mid < *lo) - QSSWAP(*mid, *lo, stmp); - -jump_over:; - pivot = *mid; - left_ptr = lo + 1; - right_ptr = hi - 1; - - /* Here's the famous ``collapse the walls'' section of quicksort. - Gotta like those tight inner loops! They are the main reason - that this algorithm runs much faster than others. */ - do { - while (*left_ptr < pivot) - left_ptr++; - - while (pivot < *right_ptr) - right_ptr--; - - if (left_ptr < right_ptr) { - QSSWAP (*left_ptr, *right_ptr, stmp); - left_ptr++; - right_ptr--; - } - else if (left_ptr == right_ptr) { - left_ptr++; - right_ptr--; - break; - } - } while (left_ptr <= right_ptr); - - /* Set up pointers for next iteration. First determine whether - left and right partitions are below the threshold size. If so, - ignore one or both. Otherwise, push the larger partition's - bounds on the stack and continue sorting the smaller one. */ - - if ((size_t) (right_ptr - lo) <= MAX_THRESH) { - if ((size_t) (hi - left_ptr) <= MAX_THRESH) - /* Ignore both small partitions. */ - POP (lo, hi); - else - /* Ignore small left partition. */ - lo = left_ptr; - } - else if ((size_t) (hi - left_ptr) <= MAX_THRESH) - /* Ignore small right partition. */ - hi = right_ptr; - else if ((right_ptr - lo) > (hi - left_ptr)) { - /* Push larger left partition indices. */ - PUSH (lo, right_ptr); - lo = left_ptr; - } - else { - /* Push larger right partition indices. */ - PUSH (left_ptr, hi); - hi = right_ptr; - } - } - } - - /* Once the BASE_PTR array is partially sorted by quicksort the rest - is completely sorted using insertion sort, since this is efficient - for partitions below MAX_THRESH size. BASE_PTR points to the beginning - of the array to sort, and END_PTR points at the very last element in - the array (*not* one beyond it!). */ - - { - idxtype *end_ptr = &pbase[total_elems - 1]; - idxtype *tmp_ptr = pbase; - idxtype *thresh = (end_ptr < pbase + MAX_THRESH ? end_ptr : pbase + MAX_THRESH); - register idxtype *run_ptr; - - /* Find smallest element in first threshold and place it at the - array's beginning. This is the smallest array element, - and the operation speeds up insertion sort's inner loop. */ - - - for (run_ptr = tmp_ptr + 1; run_ptr <= thresh; run_ptr++) - if (*run_ptr < *tmp_ptr) - tmp_ptr = run_ptr; - - if (tmp_ptr != pbase) - QSSWAP(*tmp_ptr, *pbase, stmp); - - /* Insertion sort, running from left-hand-side up to right-hand-side. */ - run_ptr = pbase + 1; - while (++run_ptr <= end_ptr) { - tmp_ptr = run_ptr - 1; - while (*run_ptr < *tmp_ptr) - tmp_ptr--; - - tmp_ptr++; - if (tmp_ptr != run_ptr) { - idxtype elmnt = *run_ptr; - idxtype *mptr; - - for (mptr=run_ptr; mptr>tmp_ptr; mptr--) - *mptr = *(mptr-1); - *mptr = elmnt; - } - } - } -} diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/iintsort.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/iintsort.c deleted file mode 100644 index fb0e8405..00000000 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/iintsort.c +++ /dev/null @@ -1,157 +0,0 @@ -#include - - -/* Byte-wise swap two items of size SIZE. */ -#define QSSWAP(a, b, stmp) do { stmp = (a); (a) = (b); (b) = stmp; } while (0) - -/* Discontinue quicksort algorithm when partition gets below this size. - This particular magic number was chosen to work best on a Sun 4/260. */ -#define MAX_THRESH 20 - -/* Stack node declarations used to store unfulfilled partition obligations. */ -typedef struct { - int *lo; - int *hi; -} stack_node; - - -/* The next 4 #defines implement a very fast in-line stack abstraction. */ -#define STACK_SIZE (8 * sizeof(unsigned long int)) -#define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top)) -#define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi))) -#define STACK_NOT_EMPTY (stack < top) - - -void iintsort(int total_elems, int *pbase) -{ - int pivot, stmp; - - if (total_elems == 0) - /* Avoid lossage with unsigned arithmetic below. */ - return; - - if (total_elems > MAX_THRESH) { - int *lo = pbase; - int *hi = &lo[total_elems - 1]; - stack_node stack[STACK_SIZE]; /* Largest size needed for 32-bit int!!! */ - stack_node *top = stack + 1; - - while (STACK_NOT_EMPTY) { - int *left_ptr; - int *right_ptr; - - /* Select median value from among LO, MID, and HI. Rearrange - LO and HI so the three values are sorted. This lowers the - probability of picking a pathological pivot value and - skips a comparison for both the LEFT_PTR and RIGHT_PTR. */ - - int *mid = lo + ((hi - lo) >> 1); - - if (*mid < *lo) - QSSWAP(*mid, *lo, stmp); - if (*hi < *mid) - QSSWAP(*mid, *hi, stmp); - else - goto jump_over; - if (*mid < *lo) - QSSWAP(*mid, *lo, stmp); - -jump_over:; - pivot = *mid; - left_ptr = lo + 1; - right_ptr = hi - 1; - - /* Here's the famous ``collapse the walls'' section of quicksort. - Gotta like those tight inner loops! They are the main reason - that this algorithm runs much faster than others. */ - do { - while (*left_ptr < pivot) - left_ptr++; - - while (pivot < *right_ptr) - right_ptr--; - - if (left_ptr < right_ptr) { - QSSWAP (*left_ptr, *right_ptr, stmp); - left_ptr++; - right_ptr--; - } - else if (left_ptr == right_ptr) { - left_ptr++; - right_ptr--; - break; - } - } while (left_ptr <= right_ptr); - - /* Set up pointers for next iteration. First determine whether - left and right partitions are below the threshold size. If so, - ignore one or both. Otherwise, push the larger partition's - bounds on the stack and continue sorting the smaller one. */ - - if ((size_t) (right_ptr - lo) <= MAX_THRESH) { - if ((size_t) (hi - left_ptr) <= MAX_THRESH) - /* Ignore both small partitions. */ - POP (lo, hi); - else - /* Ignore small left partition. */ - lo = left_ptr; - } - else if ((size_t) (hi - left_ptr) <= MAX_THRESH) - /* Ignore small right partition. */ - hi = right_ptr; - else if ((right_ptr - lo) > (hi - left_ptr)) { - /* Push larger left partition indices. */ - PUSH (lo, right_ptr); - lo = left_ptr; - } - else { - /* Push larger right partition indices. */ - PUSH (left_ptr, hi); - hi = right_ptr; - } - } - } - - /* Once the BASE_PTR array is partially sorted by quicksort the rest - is completely sorted using insertion sort, since this is efficient - for partitions below MAX_THRESH size. BASE_PTR points to the beginning - of the array to sort, and END_PTR points at the very last element in - the array (*not* one beyond it!). */ - - { - int *end_ptr = &pbase[total_elems - 1]; - int *tmp_ptr = pbase; - int *thresh = (end_ptr < pbase + MAX_THRESH ? end_ptr : pbase + MAX_THRESH); - register int *run_ptr; - - /* Find smallest element in first threshold and place it at the - array's beginning. This is the smallest array element, - and the operation speeds up insertion sort's inner loop. */ - - - for (run_ptr = tmp_ptr + 1; run_ptr <= thresh; run_ptr++) - if (*run_ptr < *tmp_ptr) - tmp_ptr = run_ptr; - - if (tmp_ptr != pbase) - QSSWAP(*tmp_ptr, *pbase, stmp); - - /* Insertion sort, running from left-hand-side up to right-hand-side. */ - run_ptr = pbase + 1; - while (++run_ptr <= end_ptr) { - tmp_ptr = run_ptr - 1; - while (*run_ptr < *tmp_ptr) - tmp_ptr--; - - tmp_ptr++; - if (tmp_ptr != run_ptr) { - int elmnt = *run_ptr; - int *mptr; - - for (mptr=run_ptr; mptr>tmp_ptr; mptr--) - *mptr = *(mptr-1); - *mptr = elmnt; - } - } - } -} diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/ikeysort.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/ikeysort.c deleted file mode 100644 index 14b42411..00000000 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/ikeysort.c +++ /dev/null @@ -1,151 +0,0 @@ -#include - - -/* Byte-wise swap two items of size SIZE. */ -#define QSSWAP(a, b, stmp) do { stmp = (a); (a) = (b); (b) = stmp; } while (0) - -/* Discontinue quicksort algorithm when partition gets below this size. - This particular magic number was chosen to work best on a Sun 4/260. */ -#define MAX_THRESH 20 - -/* Stack node declarations used to store unfulfilled partition obligations. */ -typedef struct { - KeyValueType *lo; - KeyValueType *hi; -} stack_node; - - -/* The next 4 #defines implement a very fast in-line stack abstraction. */ -#define STACK_SIZE (8 * sizeof(unsigned long int)) -#define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top)) -#define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi))) -#define STACK_NOT_EMPTY (stack < top) - - -void ikeysort(int total_elems, KeyValueType *pbase) -{ - KeyValueType pivot, stmp; - - if (total_elems == 0) - /* Avoid lossage with unsigned arithmetic below. */ - return; - - if (total_elems > MAX_THRESH) { - KeyValueType *lo = pbase; - KeyValueType *hi = &lo[total_elems - 1]; - stack_node stack[STACK_SIZE]; /* Largest size needed for 32-bit int!!! */ - stack_node *top = stack + 1; - - while (STACK_NOT_EMPTY) { - KeyValueType *left_ptr; - KeyValueType *right_ptr; - KeyValueType *mid = lo + ((hi - lo) >> 1); - - if (mid->key < lo->key) - QSSWAP(*mid, *lo, stmp); - if (hi->key < mid->key) - QSSWAP(*mid, *hi, stmp); - else - goto jump_over; - if (mid->key < lo->key) - QSSWAP(*mid, *lo, stmp); - -jump_over:; - pivot = *mid; - left_ptr = lo + 1; - right_ptr = hi - 1; - - /* Here's the famous ``collapse the walls'' section of quicksort. - Gotta like those tight inner loops! They are the main reason - that this algorithm runs much faster than others. */ - do { - while (left_ptr->key < pivot.key) - left_ptr++; - - while (pivot.key < right_ptr->key) - right_ptr--; - - if (left_ptr < right_ptr) { - QSSWAP (*left_ptr, *right_ptr, stmp); - left_ptr++; - right_ptr--; - } - else if (left_ptr == right_ptr) { - left_ptr++; - right_ptr--; - break; - } - } while (left_ptr <= right_ptr); - - /* Set up pointers for next iteration. First determine whether - left and right partitions are below the threshold size. If so, - ignore one or both. Otherwise, push the larger partition's - bounds on the stack and continue sorting the smaller one. */ - - if ((size_t) (right_ptr - lo) <= MAX_THRESH) { - if ((size_t) (hi - left_ptr) <= MAX_THRESH) - /* Ignore both small partitions. */ - POP (lo, hi); - else - /* Ignore small left partition. */ - lo = left_ptr; - } - else if ((size_t) (hi - left_ptr) <= MAX_THRESH) - /* Ignore small right partition. */ - hi = right_ptr; - else if ((right_ptr - lo) > (hi - left_ptr)) { - /* Push larger left partition indices. */ - PUSH (lo, right_ptr); - lo = left_ptr; - } - else { - /* Push larger right partition indices. */ - PUSH (left_ptr, hi); - hi = right_ptr; - } - } - } - - /* Once the BASE_PTR array is partially sorted by quicksort the rest - is completely sorted using insertion sort, since this is efficient - for partitions below MAX_THRESH size. BASE_PTR points to the beginning - of the array to sort, and END_PTR points at the very last element in - the array (*not* one beyond it!). */ - - { - KeyValueType *end_ptr = &pbase[total_elems - 1]; - KeyValueType *tmp_ptr = pbase; - KeyValueType *thresh = (end_ptr < pbase + MAX_THRESH ? end_ptr : pbase + MAX_THRESH); - register KeyValueType *run_ptr; - - /* Find smallest element in first threshold and place it at the - array's beginning. This is the smallest array element, - and the operation speeds up insertion sort's inner loop. */ - - for (run_ptr = tmp_ptr + 1; run_ptr <= thresh; run_ptr++) - if (run_ptr->key < tmp_ptr->key) - tmp_ptr = run_ptr; - - if (tmp_ptr != pbase) - QSSWAP(*tmp_ptr, *pbase, stmp); - - /* Insertion sort, running from left-hand-side up to right-hand-side. */ - run_ptr = pbase + 1; - while (++run_ptr <= end_ptr) { - tmp_ptr = run_ptr - 1; - while (run_ptr->key < tmp_ptr->key) - tmp_ptr--; - - tmp_ptr++; - if (tmp_ptr != run_ptr) { - KeyValueType elmnt = *run_ptr; - KeyValueType *mptr; - - for (mptr=run_ptr; mptr>tmp_ptr; mptr--) - *mptr = *(mptr-1); - *mptr = elmnt; - } - } - } -} - diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/ikeyvalsort.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/ikeyvalsort.c deleted file mode 100644 index 59dce528..00000000 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/ikeyvalsort.c +++ /dev/null @@ -1,151 +0,0 @@ -#include - - -/* Byte-wise swap two items of size SIZE. */ -#define QSSWAP(a, b, stmp) do { stmp = (a); (a) = (b); (b) = stmp; } while (0) - -/* Discontinue quicksort algorithm when partition gets below this size. - This particular magic number was chosen to work best on a Sun 4/260. */ -#define MAX_THRESH 20 - -/* Stack node declarations used to store unfulfilled partition obligations. */ -typedef struct { - KeyValueType *lo; - KeyValueType *hi; -} stack_node; - - -/* The next 4 #defines implement a very fast in-line stack abstraction. */ -#define STACK_SIZE (8 * sizeof(unsigned long int)) -#define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top)) -#define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi))) -#define STACK_NOT_EMPTY (stack < top) - - -void ikeyvalsort(int total_elems, KeyValueType *pbase) -{ - KeyValueType pivot, stmp; - - if (total_elems == 0) - /* Avoid lossage with unsigned arithmetic below. */ - return; - - if (total_elems > MAX_THRESH) { - KeyValueType *lo = pbase; - KeyValueType *hi = &lo[total_elems - 1]; - stack_node stack[STACK_SIZE]; /* Largest size needed for 32-bit int!!! */ - stack_node *top = stack + 1; - - while (STACK_NOT_EMPTY) { - KeyValueType *left_ptr; - KeyValueType *right_ptr; - KeyValueType *mid = lo + ((hi - lo) >> 1); - - if (mid->key < lo->key || (mid->key == lo->key && mid->val < lo->val)) - QSSWAP(*mid, *lo, stmp); - if (hi->key < mid->key || (hi->key == mid->key && hi->val < mid->val)) - QSSWAP(*mid, *hi, stmp); - else - goto jump_over; - if (mid->key < lo->key || (mid->key == lo->key && mid->val < lo->val)) - QSSWAP(*mid, *lo, stmp); - -jump_over:; - pivot = *mid; - left_ptr = lo + 1; - right_ptr = hi - 1; - - /* Here's the famous ``collapse the walls'' section of quicksort. - Gotta like those tight inner loops! They are the main reason - that this algorithm runs much faster than others. */ - do { - while (left_ptr->key < pivot.key || (left_ptr->key == pivot.key && left_ptr->val < pivot.val)) - left_ptr++; - - while (pivot.key < right_ptr->key || (pivot.key == right_ptr->key && pivot.val < right_ptr->val)) - right_ptr--; - - if (left_ptr < right_ptr) { - QSSWAP (*left_ptr, *right_ptr, stmp); - left_ptr++; - right_ptr--; - } - else if (left_ptr == right_ptr) { - left_ptr++; - right_ptr--; - break; - } - } while (left_ptr <= right_ptr); - - /* Set up pointers for next iteration. First determine whether - left and right partitions are below the threshold size. If so, - ignore one or both. Otherwise, push the larger partition's - bounds on the stack and continue sorting the smaller one. */ - - if ((size_t) (right_ptr - lo) <= MAX_THRESH) { - if ((size_t) (hi - left_ptr) <= MAX_THRESH) - /* Ignore both small partitions. */ - POP (lo, hi); - else - /* Ignore small left partition. */ - lo = left_ptr; - } - else if ((size_t) (hi - left_ptr) <= MAX_THRESH) - /* Ignore small right partition. */ - hi = right_ptr; - else if ((right_ptr - lo) > (hi - left_ptr)) { - /* Push larger left partition indices. */ - PUSH (lo, right_ptr); - lo = left_ptr; - } - else { - /* Push larger right partition indices. */ - PUSH (left_ptr, hi); - hi = right_ptr; - } - } - } - - /* Once the BASE_PTR array is partially sorted by quicksort the rest - is completely sorted using insertion sort, since this is efficient - for partitions below MAX_THRESH size. BASE_PTR points to the beginning - of the array to sort, and END_PTR points at the very last element in - the array (*not* one beyond it!). */ - - { - KeyValueType *end_ptr = &pbase[total_elems - 1]; - KeyValueType *tmp_ptr = pbase; - KeyValueType *thresh = (end_ptr < pbase + MAX_THRESH ? end_ptr : pbase + MAX_THRESH); - register KeyValueType *run_ptr; - - /* Find smallest element in first threshold and place it at the - array's beginning. This is the smallest array element, - and the operation speeds up insertion sort's inner loop. */ - - for (run_ptr = tmp_ptr + 1; run_ptr <= thresh; run_ptr++) - if (run_ptr->key < tmp_ptr->key || (run_ptr->key == tmp_ptr->key && run_ptr->val < tmp_ptr->val)) - tmp_ptr = run_ptr; - - if (tmp_ptr != pbase) - QSSWAP(*tmp_ptr, *pbase, stmp); - - /* Insertion sort, running from left-hand-side up to right-hand-side. */ - run_ptr = pbase + 1; - while (++run_ptr <= end_ptr) { - tmp_ptr = run_ptr - 1; - while (run_ptr->key < tmp_ptr->key || (run_ptr->key == tmp_ptr->key && run_ptr->val < tmp_ptr->val)) - tmp_ptr--; - - tmp_ptr++; - if (tmp_ptr != run_ptr) { - KeyValueType elmnt = *run_ptr; - KeyValueType *mptr; - - for (mptr=run_ptr; mptr>tmp_ptr; mptr--) - *mptr = *(mptr-1); - *mptr = elmnt; - } - } - } -} - diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/initbalance.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/initbalance.c index 8d396761..e00c00ac 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/initbalance.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/initbalance.c @@ -8,7 +8,7 @@ * Started 3/4/96 * George * - * $Id: initbalance.c,v 1.4 2003/07/30 21:18:52 karypis Exp $ + * $Id: initbalance.c 10592 2011-07-16 21:17:53Z karypis $ */ #include @@ -19,81 +19,72 @@ * This algorithm assembles the graph to all the processors and preceeds * with the balancing step. **************************************************************************/ -void Balance_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) +void Balance_Partition(ctrl_t *ctrl, graph_t *graph) { - int i, j, mype, npes, nvtxs, nedges, ncon; - idxtype *vtxdist, *xadj, *adjncy, *adjwgt, *vwgt, *vsize; - idxtype *part, *lwhere, *home; - GraphType *agraph, cgraph; - CtrlType myctrl; - int lnparts, fpart, fpe, lnpes, ngroups, srnpes, srmype; - int twoparts=2, numflag = 0, wgtflag = 3, moptions[10], edgecut, max_cut; - int sr_pe, gd_pe, sr, gd, who_wins, *rcounts, *rdispls; - float my_cut, my_totalv, my_cost = -1.0, my_balance = -1.0, wsum; - float rating, max_rating, your_cost = -1.0, your_balance = -1.0; - float lbvec[MAXNCON], lbsum, min_lbsum, *mytpwgts, mytpwgts2[2], buffer[2]; + idx_t i, j, nvtxs, nedges, ncon; + idx_t mype, npes, srnpes, srmype; + idx_t *vtxdist, *xadj, *adjncy, *adjwgt, *vwgt, *vsize; + idx_t *part, *lwhere, *home; + idx_t lnparts, fpart, fpe, lnpes, ngroups; + idx_t *rcounts, *rdispls; + idx_t twoparts=2, moptions[METIS_NOPTIONS], edgecut, max_cut; + idx_t sr_pe, gd_pe, sr, gd, who_wins; + real_t my_cut, my_totalv, my_cost = -1.0, my_balance = -1.0, wsum; + real_t rating, max_rating, your_cost = -1.0, your_balance = -1.0; + real_t lbsum, min_lbsum, *lbvec, *tpwgts, *tpwgts2, buffer[2]; + graph_t *agraph, cgraph; + ctrl_t *myctrl; MPI_Status status; MPI_Comm ipcomm, srcomm; struct { - float cost; + double cost; int rank; } lpecost, gpecost; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); + WCOREPUSH; vtxdist = graph->vtxdist; - agraph = Mc_AssembleAdaptiveGraph(ctrl, graph, wspace); - nvtxs = cgraph.nvtxs = agraph->nvtxs; - nedges = cgraph.nedges = agraph->nedges; - ncon = cgraph.ncon = agraph->ncon; - - xadj = cgraph.xadj = idxmalloc(nvtxs*(5+ncon)+1+nedges*2, "U_IP: xadj"); - vwgt = cgraph.vwgt = xadj + nvtxs+1; - vsize = cgraph.vsize = xadj + nvtxs*(1+ncon)+1; - cgraph.where = agraph->where = part = xadj + nvtxs*(2+ncon)+1; - lwhere = xadj + nvtxs*(3+ncon)+1; - home = xadj + nvtxs*(4+ncon)+1; - adjncy = cgraph.adjncy = xadj + nvtxs*(5+ncon)+1; - adjwgt = cgraph.adjwgt = xadj + nvtxs*(5+ncon)+1 + nedges; - - /* ADD: this assumes that tpwgts for all constraints is the same */ - /* ADD: this is necessary because serial metis does not support the general case */ - mytpwgts = fsmalloc(ctrl->nparts, 0.0, "mytpwgts"); - for (i=0; inparts; i++) - for (j=0; jtpwgts[i*ncon+j]; - for (i=0; inparts; i++) - mytpwgts[i] /= (float)ncon; + agraph = AssembleAdaptiveGraph(ctrl, graph); + nvtxs = cgraph.nvtxs = agraph->nvtxs; + nedges = cgraph.nedges = agraph->nedges; + ncon = cgraph.ncon = agraph->ncon; + xadj = cgraph.xadj = icopy(nvtxs+1, agraph->xadj, iwspacemalloc(ctrl, nvtxs+1)); + vwgt = cgraph.vwgt = icopy(nvtxs*ncon, agraph->vwgt, iwspacemalloc(ctrl, nvtxs*ncon)); + vsize = cgraph.vsize = icopy(nvtxs, agraph->vsize, iwspacemalloc(ctrl, nvtxs)); + adjncy = cgraph.adjncy = icopy(nedges, agraph->adjncy, iwspacemalloc(ctrl, nedges)); + adjwgt = cgraph.adjwgt = icopy(nedges, agraph->adjwgt, iwspacemalloc(ctrl, nedges)); + part = cgraph.where = agraph->where = iwspacemalloc(ctrl, nvtxs); + + lwhere = iwspacemalloc(ctrl, nvtxs); + home = iwspacemalloc(ctrl, nvtxs); + lbvec = rwspacemalloc(ctrl, graph->ncon); - idxcopy(nvtxs+1, agraph->xadj, xadj); - idxcopy(nvtxs*ncon, agraph->vwgt, vwgt); - idxcopy(nvtxs, agraph->vsize, vsize); - idxcopy(nedges, agraph->adjncy, adjncy); - idxcopy(nedges, agraph->adjwgt, adjwgt); /****************************************/ /****************************************/ if (ctrl->ps_relation == PARMETIS_PSR_UNCOUPLED) { - rcounts = imalloc(ctrl->npes, "rcounts"); - rdispls = imalloc(ctrl->npes+1, "rdispls"); + WCOREPUSH; + rcounts = iwspacemalloc(ctrl, ctrl->npes); + rdispls = iwspacemalloc(ctrl, ctrl->npes+1); - for (i=0; inpes; i++) { + for (i=0; inpes; i++) rdispls[i] = rcounts[i] = vtxdist[i+1]-vtxdist[i]; - } MAKECSR(i, ctrl->npes, rdispls); - MPI_Allgatherv((void *)graph->home, graph->nvtxs, IDX_DATATYPE, - (void *)part, rcounts, rdispls, IDX_DATATYPE, ctrl->comm); + gkMPI_Allgatherv((void *)graph->home, graph->nvtxs, IDX_T, + (void *)part, rcounts, rdispls, IDX_T, ctrl->comm); for (i=0; invtxs; i++) home[i] = part[i]; - GKfree((void **)&rcounts, (void **)&rdispls, LTERM); + WCOREPOP; /* local frees */ } else { - for (i=0; inpes; i++) + for (i=0; inpes; i++) { for (j=vtxdist[i]; jdbglvl, DBG_REFINEINFO, Mc_ComputeSerialBalance(ctrl, agraph, agraph->where, lbvec)); - IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "input cut: %d, balance: ", ComputeSerialEdgeCut(agraph))); + IFSET(ctrl->dbglvl, DBG_REFINEINFO, + ComputeSerialBalance(ctrl, agraph, agraph->where, lbvec)); + IFSET(ctrl->dbglvl, DBG_REFINEINFO, + rprintf(ctrl, "input cut: %"PRIDX", balance: ", ComputeSerialEdgeCut(agraph))); for (i=0; incon; i++) - IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "%.3f ", lbvec[i])); + IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "%.3"PRREAL" ", lbvec[i])); IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "\n")); /****************************************/ @@ -126,202 +119,211 @@ void Balance_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) sr_pe = 0; gd_pe = 1; - MPI_Comm_split(ctrl->gcomm, sr, 0, &ipcomm); - MPI_Comm_rank(ipcomm, &mype); - MPI_Comm_size(ipcomm, &npes); - - myctrl.dbglvl = 0; - myctrl.mype = mype; - myctrl.npes = npes; - myctrl.comm = ipcomm; - myctrl.sync = ctrl->sync; - myctrl.seed = ctrl->seed; - myctrl.nparts = ctrl->nparts; - myctrl.ipc_factor = ctrl->ipc_factor; - myctrl.redist_factor = ctrl->redist_base; - myctrl.partType = ADAPTIVE_PARTITION; - myctrl.ps_relation = PARMETIS_PSR_UNCOUPLED; - myctrl.tpwgts = ctrl->tpwgts; - icopy(ncon, ctrl->tvwgts, myctrl.tvwgts); - icopy(ncon, ctrl->ubvec, myctrl.ubvec); - - if (sr == 1) { - /*******************************************/ - /* Half of the processors do scratch-remap */ - /*******************************************/ - ngroups = amax(amin(RIP_SPLIT_FACTOR, npes), 1); - MPI_Comm_split(ipcomm, mype % ngroups, 0, &srcomm); - MPI_Comm_rank(srcomm, &srmype); - MPI_Comm_size(srcomm, &srnpes); - - moptions[0] = 0; - moptions[7] = ctrl->sync + (mype % ngroups) + 1; - - idxset(nvtxs, 0, lwhere); + gkMPI_Comm_split(ctrl->gcomm, sr, 0, &ipcomm); + gkMPI_Comm_rank(ipcomm, &mype); + gkMPI_Comm_size(ipcomm, &npes); + + if (sr == 1) { /* Half of the processors do scratch-remap */ + ngroups = gk_max(gk_min(RIP_SPLIT_FACTOR, npes), 1); + gkMPI_Comm_split(ipcomm, mype % ngroups, 0, &srcomm); + gkMPI_Comm_rank(srcomm, &srmype); + gkMPI_Comm_size(srcomm, &srnpes); + + METIS_SetDefaultOptions(moptions); + moptions[METIS_OPTION_SEED] = ctrl->sync + (mype % ngroups) + 1; + + tpwgts = ctrl->tpwgts; + tpwgts2 = rwspacemalloc(ctrl, 2*ncon); + + iset(nvtxs, 0, lwhere); lnparts = ctrl->nparts; fpart = fpe = 0; lnpes = srnpes; while (lnpes > 1 && lnparts > 1) { - ASSERT(ctrl, agraph->nvtxs > 1); - /* Determine the weights of the partitions */ - mytpwgts2[0] = ssum(lnparts/2, mytpwgts+fpart); - mytpwgts2[1] = 1.0-mytpwgts2[0]; - - - if (agraph->ncon == 1) { - METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt, - agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2, moptions, &edgecut, - part); - } - else { - METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, - agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2, - moptions, &edgecut, part); + PASSERT(ctrl, agraph->nvtxs > 1); + /* determine the weights of the two partitions as a function of the + weight of the target partition weights */ + for (j=(lnparts>>1), i=0; invtxs, &ncon, agraph->xadj, + agraph->adjncy, agraph->vwgt, NULL, agraph->adjwgt, + &twoparts, tpwgts2, NULL, moptions, &edgecut, part); - /* I'm picking the left branch */ + /* pick one of the branches */ if (srmype < fpe+lnpes/2) { - Mc_KeepPart(agraph, wspace, part, 0); - lnpes = lnpes/2; + KeepPart(ctrl, agraph, part, 0); + lnpes = lnpes/2; lnparts = lnparts/2; } else { - Mc_KeepPart(agraph, wspace, part, 1); - fpart = fpart + lnparts/2; - fpe = fpe + lnpes/2; - lnpes = lnpes - lnpes/2; + KeepPart(ctrl, agraph, part, 1); + fpart = fpart + lnparts/2; + fpe = fpe + lnpes/2; + lnpes = lnpes - lnpes/2; lnparts = lnparts - lnparts/2; } } - /* In case srnpes is greater than or equal to nparts */ - if (lnparts == 1) { + if (lnparts == 1) { /* Case in which srnpes is greater or equal to nparts */ /* Only the first process will assign labels (for the reduction to work) */ if (srmype == fpe) { for (i=0; invtxs; i++) lwhere[agraph->label[i]] = fpart; } } - /* In case srnpes is smaller than nparts */ - else { - if (ncon == 1) - METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt, - agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart, moptions, - &edgecut, part); - else - METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, - agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart, - moptions, &edgecut, part); + else { /* Case in which srnpes is smaller than nparts */ + /* create the normalized tpwgts for the lnparts from ctrl->tpwgts */ + tpwgts = rwspacemalloc(ctrl, lnparts*ncon); + for (j=0; jtpwgts[(fpart+i)*ncon+j]; + wsum += tpwgts[i*ncon+j]; + } + for (wsum=1.0/wsum, i=0; invtxs, &ncon, agraph->xadj, agraph->adjncy, + agraph->vwgt, NULL, agraph->adjwgt, &lnparts, tpwgts, NULL, moptions, + &edgecut, part); for (i=0; invtxs; i++) lwhere[agraph->label[i]] = fpart + part[i]; } - MPI_Allreduce((void *)lwhere, (void *)part, nvtxs, IDX_DATATYPE, MPI_SUM, srcomm); + gkMPI_Allreduce((void *)lwhere, (void *)part, nvtxs, IDX_T, MPI_SUM, srcomm); edgecut = ComputeSerialEdgeCut(&cgraph); - Mc_ComputeSerialBalance(ctrl, &cgraph, part, lbvec); - lbsum = ssum(ncon, lbvec); - MPI_Allreduce((void *)&edgecut, (void *)&max_cut, 1, MPI_INT, MPI_MAX, ipcomm); - MPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, MPI_FLOAT, MPI_MIN, ipcomm); + ComputeSerialBalance(ctrl, &cgraph, part, lbvec); + lbsum = rsum(ncon, lbvec, 1); + gkMPI_Allreduce((void *)&edgecut, (void *)&max_cut, 1, IDX_T, MPI_MAX, ipcomm); + gkMPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, REAL_T, MPI_MIN, ipcomm); lpecost.rank = ctrl->mype; lpecost.cost = lbsum; - if (min_lbsum < UNBALANCE_FRACTION * (float)(ncon)) { - if (lbsum < UNBALANCE_FRACTION * (float)(ncon)) - lpecost.cost = (float)edgecut; + if (min_lbsum < UNBALANCE_FRACTION * (real_t)(ncon)) { + if (lbsum < UNBALANCE_FRACTION * (real_t)(ncon)) + lpecost.cost = (double)edgecut; else - lpecost.cost = (float)max_cut + lbsum; + lpecost.cost = (double)max_cut + lbsum; } - MPI_Allreduce((void *)&lpecost, (void *)&gpecost, 1, MPI_FLOAT_INT, MPI_MINLOC, ipcomm); + gkMPI_Allreduce((void *)&lpecost, (void *)&gpecost, 1, MPI_DOUBLE_INT, + MPI_MINLOC, ipcomm); - if (ctrl->mype == gpecost.rank && ctrl->mype != sr_pe) { - MPI_Send((void *)part, nvtxs, IDX_DATATYPE, sr_pe, 1, ctrl->comm); - } + if (ctrl->mype == gpecost.rank && ctrl->mype != sr_pe) + gkMPI_Send((void *)part, nvtxs, IDX_T, sr_pe, 1, ctrl->comm); - if (ctrl->mype != gpecost.rank && ctrl->mype == sr_pe) { - MPI_Recv((void *)part, nvtxs, IDX_DATATYPE, gpecost.rank, 1, ctrl->comm, &status); - } + if (ctrl->mype != gpecost.rank && ctrl->mype == sr_pe) + gkMPI_Recv((void *)part, nvtxs, IDX_T, gpecost.rank, 1, ctrl->comm, &status); if (ctrl->mype == sr_pe) { - idxcopy(nvtxs, part, lwhere); - SerialRemap(&cgraph, ctrl->nparts, home, lwhere, part, ctrl->tpwgts); + icopy(nvtxs, part, lwhere); + SerialRemap(ctrl, &cgraph, ctrl->nparts, home, lwhere, part, ctrl->tpwgts); } - MPI_Comm_free(&srcomm); + gkMPI_Comm_free(&srcomm); } - /**************************************/ - /* The other half do global diffusion */ - /**************************************/ - else { - /******************************************************************/ - /* The next stmt is required to balance out the sr MPI_Comm_split */ - /******************************************************************/ - MPI_Comm_split(ipcomm, MPI_UNDEFINED, 0, &srcomm); + else { /* The other half do global diffusion */ + /* setup a ctrl for the diffusion */ + myctrl = (ctrl_t *)gk_malloc(sizeof(ctrl_t), "myctrl"); + memset(myctrl, 0, sizeof(ctrl_t)); + myctrl->mype = mype; + myctrl->npes = npes; + myctrl->comm = ipcomm; + myctrl->sync = ctrl->sync; + myctrl->seed = ctrl->seed; + myctrl->nparts = ctrl->nparts; + myctrl->ncon = ctrl->ncon; + myctrl->ipc_factor = ctrl->ipc_factor; + myctrl->redist_factor = ctrl->redist_base; + myctrl->partType = ADAPTIVE_PARTITION; + myctrl->ps_relation = PARMETIS_PSR_UNCOUPLED; + myctrl->tpwgts = rmalloc(myctrl->nparts*myctrl->ncon, "myctrl->tpwgts"); + myctrl->ubvec = rmalloc(myctrl->ncon, "myctrl->ubvec"); + myctrl->invtvwgts = rmalloc(myctrl->ncon, "myctrl->invtvwgts"); + + rcopy(myctrl->nparts*myctrl->ncon, ctrl->tpwgts, myctrl->tpwgts); + rcopy(myctrl->ncon, ctrl->ubvec, myctrl->ubvec); + rcopy(myctrl->ncon, ctrl->invtvwgts, myctrl->invtvwgts); + + AllocateWSpace(myctrl, 10*agraph->nvtxs); + AllocateRefinementWorkSpace(myctrl, agraph->nvtxs); + + /* This stmt is required to balance out the sr gkMPI_Comm_split */ + gkMPI_Comm_split(ipcomm, MPI_UNDEFINED, 0, &srcomm); if (ncon == 1) { - rating = WavefrontDiffusion(&myctrl, agraph, home); - Mc_ComputeSerialBalance(ctrl, &cgraph, part, lbvec); - lbsum = ssum(ncon, lbvec); + rating = WavefrontDiffusion(myctrl, agraph, home); + ComputeSerialBalance(ctrl, &cgraph, part, lbvec); + lbsum = rsum(ncon, lbvec, 1); /* Determine which PE computed the best partitioning */ - MPI_Allreduce((void *)&rating, (void *)&max_rating, 1, MPI_FLOAT, MPI_MAX, ipcomm); - MPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, MPI_FLOAT, MPI_MIN, ipcomm); + gkMPI_Allreduce((void *)&rating, (void *)&max_rating, 1, REAL_T, MPI_MAX, ipcomm); + gkMPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, REAL_T, MPI_MIN, ipcomm); lpecost.rank = ctrl->mype; lpecost.cost = lbsum; - if (min_lbsum < UNBALANCE_FRACTION * (float)(ncon)) { - if (lbsum < UNBALANCE_FRACTION * (float)(ncon)) + if (min_lbsum < UNBALANCE_FRACTION * (real_t)(ncon)) { + if (lbsum < UNBALANCE_FRACTION * (real_t)(ncon)) lpecost.cost = rating; else lpecost.cost = max_rating + lbsum; } - MPI_Allreduce((void *)&lpecost, (void *)&gpecost, 1, MPI_FLOAT_INT, MPI_MINLOC, ipcomm); + gkMPI_Allreduce((void *)&lpecost, (void *)&gpecost, 1, MPI_DOUBLE_INT, + MPI_MINLOC, ipcomm); /* Now send this to the coordinating processor */ if (ctrl->mype == gpecost.rank && ctrl->mype != gd_pe) - MPI_Send((void *)part, nvtxs, IDX_DATATYPE, gd_pe, 1, ctrl->comm); + gkMPI_Send((void *)part, nvtxs, IDX_T, gd_pe, 1, ctrl->comm); if (ctrl->mype != gpecost.rank && ctrl->mype == gd_pe) - MPI_Recv((void *)part, nvtxs, IDX_DATATYPE, gpecost.rank, 1, ctrl->comm, &status); + gkMPI_Recv((void *)part, nvtxs, IDX_T, gpecost.rank, 1, ctrl->comm, &status); if (ctrl->mype == gd_pe) { - idxcopy(nvtxs, part, lwhere); - SerialRemap(&cgraph, ctrl->nparts, home, lwhere, part, ctrl->tpwgts); + icopy(nvtxs, part, lwhere); + SerialRemap(ctrl, &cgraph, ctrl->nparts, home, lwhere, part, ctrl->tpwgts); } } else { - Mc_Diffusion(&myctrl, agraph, graph->vtxdist, agraph->where, home, wspace, N_MOC_GD_PASSES); + Mc_Diffusion(myctrl, agraph, graph->vtxdist, agraph->where, home, + N_MOC_GD_PASSES); } + + FreeCtrl(&myctrl); } + if (graph->ncon <= MAX_NCON_FOR_DIFFUSION) { if (ctrl->mype == sr_pe || ctrl->mype == gd_pe) { /********************************************************************/ /* The coordinators from each group decide on the best partitioning */ /********************************************************************/ - my_cut = (float) ComputeSerialEdgeCut(&cgraph); - my_totalv = (float) Mc_ComputeSerialTotalV(&cgraph, home); - Mc_ComputeSerialBalance(ctrl, &cgraph, part, lbvec); - my_balance = ssum(cgraph.ncon, lbvec); - my_balance /= (float) cgraph.ncon; + my_cut = (real_t) ComputeSerialEdgeCut(&cgraph); + my_totalv = (real_t) Mc_ComputeSerialTotalV(&cgraph, home); + ComputeSerialBalance(ctrl, &cgraph, part, lbvec); + my_balance = rsum(cgraph.ncon, lbvec, 1); + my_balance /= (real_t) cgraph.ncon; my_cost = ctrl->ipc_factor * my_cut + REDIST_WGT * ctrl->redist_base * my_totalv; - IFSET(ctrl->dbglvl, DBG_REFINEINFO, printf("%s initial cut: %.1f, totalv: %.1f, balance: %.3f\n", - (ctrl->mype == sr_pe ? "scratch-remap" : "diffusion"), my_cut, my_totalv, my_balance)); + IFSET(ctrl->dbglvl, DBG_REFINEINFO, + printf("%s initial cut: %.1"PRREAL", totalv: %.1"PRREAL", balance: %.3"PRREAL"\n", + (ctrl->mype == sr_pe ? "scratch-remap" : "diffusion"), + my_cut, my_totalv, my_balance)); if (ctrl->mype == gd_pe) { buffer[0] = my_cost; buffer[1] = my_balance; - MPI_Send((void *)buffer, 2, MPI_FLOAT, sr_pe, 1, ctrl->comm); + gkMPI_Send((void *)buffer, 2, REAL_T, sr_pe, 1, ctrl->comm); } else { - MPI_Recv((void *)buffer, 2, MPI_FLOAT, gd_pe, 1, ctrl->comm, &status); - your_cost = buffer[0]; + gkMPI_Recv((void *)buffer, 2, REAL_T, gd_pe, 1, ctrl->comm, &status); + your_cost = buffer[0]; your_balance = buffer[1]; } } @@ -335,41 +337,41 @@ void Balance_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) } } - MPI_Bcast((void *)&who_wins, 1, MPI_INT, sr_pe, ctrl->comm); + gkMPI_Bcast((void *)&who_wins, 1, IDX_T, sr_pe, ctrl->comm); } else { who_wins = sr_pe; } - MPI_Bcast((void *)part, nvtxs, IDX_DATATYPE, who_wins, ctrl->comm); - idxcopy(graph->nvtxs, part+vtxdist[ctrl->mype], graph->where); - - MPI_Comm_free(&ipcomm); - GKfree((void **)&xadj, (void **)&mytpwgts, LTERM); + gkMPI_Bcast((void *)part, nvtxs, IDX_T, who_wins, ctrl->comm); + icopy(graph->nvtxs, part+vtxdist[ctrl->mype], graph->where); - GKfree((void **)&agraph->xadj, (void **)&agraph->adjncy, (void **)&agraph->vwgt, - (void **)&agraph->nvwgt, LTERM); - GKfree((void **)&agraph->vsize, (void **)&agraph->adjwgt, (void **)&agraph->label, LTERM); - GKfree((void **)&agraph, LTERM); + gkMPI_Comm_free(&ipcomm); + agraph->where = NULL; + FreeGraph(agraph); + + WCOREPOP; IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); } -/* NOTE: this subroutine should work for static, adaptive, single-, and multi-contraint */ -/************************************************************************* -* This function assembles the graph into a single processor -**************************************************************************/ -GraphType *Mc_AssembleAdaptiveGraph(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) +/*************************************************************************/ +/*! This function assembles the graph into a single processor. It should + work for static, adaptive, single-, and multi-contraint */ +/*************************************************************************/ +graph_t *AssembleAdaptiveGraph(ctrl_t *ctrl, graph_t *graph) { - int i, j, k, l, gnvtxs, nvtxs, ncon, gnedges, nedges, gsize; - idxtype *xadj, *vwgt, *vsize, *adjncy, *adjwgt, *vtxdist, *imap; - idxtype *axadj, *aadjncy, *aadjwgt, *avwgt, *avsize = NULL, *alabel; - idxtype *mygraph, *ggraph; - int *rcounts, *rdispls, mysize; - float *anvwgt; - GraphType *agraph; + idx_t i, j, k, l, gnvtxs, nvtxs, ncon, gnedges, nedges, gsize; + idx_t *xadj, *vwgt, *vsize, *adjncy, *adjwgt, *vtxdist, *imap; + idx_t *axadj, *aadjncy, *aadjwgt, *avwgt, *avsize = NULL, *alabel; + idx_t *mygraph, *ggraph; + idx_t *rcounts, *rdispls, mysize; + real_t *anvwgt; + graph_t *agraph; + + WCOREPUSH; gnvtxs = graph->gnvtxs; nvtxs = graph->nvtxs; @@ -384,9 +386,9 @@ GraphType *Mc_AssembleAdaptiveGraph(CtrlType *ctrl, GraphType *graph, WorkSpaceT imap = graph->imap; /*************************************************************/ - /* Determine the # of idxtype to receive from each processor */ + /* Determine the # of idx_t to receive from each processor */ /*************************************************************/ - rcounts = imalloc(ctrl->npes, "AssembleGraph: rcounts"); + rcounts = iwspacemalloc(ctrl, ctrl->npes); switch (ctrl->partType) { case STATIC_PARTITION: mysize = (1+ncon)*nvtxs + 2*nedges; @@ -396,18 +398,23 @@ GraphType *Mc_AssembleAdaptiveGraph(CtrlType *ctrl, GraphType *graph, WorkSpaceT mysize = (2+ncon)*nvtxs + 2*nedges; break; default: - printf("WARNING: bad value for ctrl->partType %d\n", ctrl->partType); + printf("WARNING: bad value for ctrl->partType %"PRIDX"\n", ctrl->partType); break; } - MPI_Allgather((void *)(&mysize), 1, MPI_INT, (void *)rcounts, 1, MPI_INT, ctrl->comm); + gkMPI_Allgather((void *)(&mysize), 1, IDX_T, (void *)rcounts, 1, IDX_T, ctrl->comm); - rdispls = imalloc(ctrl->npes+1, "AssembleGraph: rdispls"); - rdispls[0] = 0; - for (i=1; inpes+1; i++) + rdispls = iwspacemalloc(ctrl, ctrl->npes+1); + for (rdispls[0]=0, i=1; inpes+1; i++) rdispls[i] = rdispls[i-1] + rcounts[i-1]; + /* allocate memory for the recv buffer of the assembled graph */ + gsize = rdispls[ctrl->npes]; + ggraph = iwspacemalloc(ctrl, gsize); + /* Construct the one-array storage format of the assembled graph */ - mygraph = (mysize <= wspace->maxcore ? wspace->core : idxmalloc(mysize, "AssembleGraph: mygraph")); + WCOREPUSH; /* for freeing mygraph */ + mygraph = iwspacemalloc(ctrl, mysize); + for (k=i=0; inpes]; - ggraph = (gsize <= wspace->maxcore-mysize ? wspace->core+mysize : idxmalloc(gsize, "AssembleGraph: ggraph")); - MPI_Allgatherv((void *)mygraph, mysize, IDX_DATATYPE, (void *)ggraph, rcounts, rdispls, IDX_DATATYPE, ctrl->comm); + gkMPI_Allgatherv((void *)mygraph, mysize, IDX_T, (void *)ggraph, + rcounts, rdispls, IDX_T, ctrl->comm); + + WCOREPOP; /* free mygraph */ - GKfree((void **)&rcounts, (void **)&rdispls, LTERM); - if (mysize > wspace->maxcore) - GKfree((void **)&mygraph, LTERM); agraph = CreateGraph(); agraph->nvtxs = gnvtxs; + agraph->ncon = ncon; + switch (ctrl->partType) { case STATIC_PARTITION: agraph->nedges = gnedges = (gsize-(1+ncon)*gnvtxs)/2; @@ -443,24 +450,23 @@ GraphType *Mc_AssembleAdaptiveGraph(CtrlType *ctrl, GraphType *graph, WorkSpaceT agraph->nedges = gnedges = (gsize-(2+ncon)*gnvtxs)/2; break; default: - printf("WARNING: bad value for ctrl->partType %d\n", ctrl->partType); + printf("WARNING: bad value for ctrl->partType %"PRIDX"\n", ctrl->partType); agraph->nedges = gnedges = -1; break; } - agraph->ncon = ncon; /*******************************************/ /* Allocate memory for the assembled graph */ /*******************************************/ - axadj = agraph->xadj = idxmalloc(gnvtxs+1, "AssembleGraph: axadj"); - avwgt = agraph->vwgt = idxmalloc(gnvtxs*ncon, "AssembleGraph: avwgt"); - anvwgt = agraph->nvwgt = fmalloc(gnvtxs*ncon, "AssembleGraph: anvwgt"); - aadjncy = agraph->adjncy = idxmalloc(gnedges, "AssembleGraph: adjncy"); - aadjwgt = agraph->adjwgt = idxmalloc(gnedges, "AssembleGraph: adjwgt"); - alabel = agraph->label = idxmalloc(gnvtxs, "AssembleGraph: alabel"); + axadj = agraph->xadj = imalloc(gnvtxs+1, "AssembleGraph: axadj"); + avwgt = agraph->vwgt = imalloc(gnvtxs*ncon, "AssembleGraph: avwgt"); + anvwgt = agraph->nvwgt = rmalloc(gnvtxs*ncon, "AssembleGraph: anvwgt"); + aadjncy = agraph->adjncy = imalloc(gnedges, "AssembleGraph: adjncy"); + aadjwgt = agraph->adjwgt = imalloc(gnedges, "AssembleGraph: adjwgt"); + alabel = agraph->label = imalloc(gnvtxs, "AssembleGraph: alabel"); if (ctrl->partType == ADAPTIVE_PARTITION || ctrl->partType == REFINE_PARTITION) - avsize = agraph->vsize = idxmalloc(gnvtxs, "AssembleGraph: avsize"); + avsize = agraph->vsize = imalloc(gnvtxs, "AssembleGraph: avsize"); for (k=j=i=0; ivwgt[i*ncon+j]) / (float)(ctrl->tvwgts[j]); + anvwgt[i*ncon+j] = ctrl->invtvwgts[j]*agraph->vwgt[i*ncon+j]; + } - for (i=0; i wspace->maxcore-mysize) - GKfree((void **)&ggraph, LTERM); + WCOREPOP; return agraph; } diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/initmsection.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/initmsection.c index 3699f1b2..ce9cb140 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/initmsection.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/initmsection.c @@ -8,7 +8,7 @@ * Started 6/3/97 * George * - * $Id: initmsection.c,v 1.2 2003/07/21 17:18:49 karypis Exp $ + * $Id: initmsection.c 10361 2011-06-21 19:16:22Z karypis $ */ #include @@ -17,7 +17,6 @@ #define DEBUG_IPART_ - /************************************************************************************/ /*! The entry point of the algorithm that finds the separators of the coarsest graph. @@ -42,65 +41,49 @@ */ /************************************************************************************/ -void InitMultisection(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) +void InitMultisection(ctrl_t *ctrl, graph_t *graph) { - int i, lpecut[2], gpecut[2], mypart, moptions[10]; - idxtype *vtxdist, *gwhere = NULL, *part, *label; - GraphType *agraph; - int *sendcounts, *displs; + idx_t i, myrank, mypart, options[METIS_NOPTIONS]; + idx_t *vtxdist, *gwhere = NULL, *part, *label; + graph_t *agraph; + idx_t *sendcounts, *displs; MPI_Comm newcomm, labelcomm; + struct { + double cut; + int rank; + } lpecut, gpecut; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); + WCOREPUSH; /* Assemble the graph and do the necessary pre-processing */ - agraph = AssembleMultisectedGraph(ctrl, graph, wspace); - part = agraph->where; + agraph = AssembleMultisectedGraph(ctrl, graph); + part = agraph->where; agraph->where = NULL; /* Split the processors into groups so that each one can do a bisection */ mypart = ctrl->mype%(ctrl->nparts/2); - MPI_Comm_split(ctrl->comm, mypart, 0, &newcomm); + gkMPI_Comm_split(ctrl->comm, mypart, 0, &newcomm); + gkMPI_Comm_rank(newcomm, &myrank); /* Each processor keeps the graph that it only needs and bisects it */ - agraph->ncon = 1; /* needed for Mc_KeepPart */ - Mc_KeepPart(agraph, wspace, part, mypart); + KeepPart(ctrl, agraph, part, mypart); label = agraph->label; /* Save this because ipart may need it */ agraph->label = NULL; /* Bisect the graph and construct the separator */ - switch (ctrl->ipart) { - case ISEP_EDGE: - moptions[0] = 1; - moptions[1] = 3; - moptions[2] = 1; - moptions[3] = 1; - moptions[4] = 0; - moptions[7] = (ctrl->mype+8)*101; - - agraph->where = idxmalloc(agraph->nvtxs, "InitMultisection: agraph->where"); - - METIS_EdgeComputeSeparator(&agraph->nvtxs, agraph->xadj, agraph->adjncy, - agraph->vwgt, agraph->adjwgt, moptions, &agraph->mincut, agraph->where); - break; - case ISEP_NODE: - moptions[0] = 1; - moptions[1] = 3; - moptions[2] = 1; - moptions[3] = 2; - moptions[4] = 0; - moptions[7] = (ctrl->mype+8)*101; - - agraph->where = idxmalloc(agraph->nvtxs, "InitMultisection: agraph->where"); - - METIS_NodeComputeSeparator(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt, - agraph->adjwgt, &ctrl->ubfrac, moptions, &agraph->mincut, agraph->where); - break; - default: - errexit("Unknown ISEP type!\n"); - } + METIS_SetDefaultOptions(options); + options[METIS_OPTION_SEED] = (ctrl->mype+8)*101; + options[METIS_OPTION_NSEPS] = 5; + options[METIS_OPTION_UFACTOR] = (idx_t)(1000.0*(ctrl->ubfrac - 1.0)); + + WCOREPUSH; /* for freeing agraph->where and gwhere */ + agraph->where = iwspacemalloc(ctrl, agraph->nvtxs); + METIS_ComputeVertexSeparator(&agraph->nvtxs, agraph->xadj, agraph->adjncy, + agraph->vwgt, options, &agraph->mincut, agraph->where); for (i=0; invtxs; i++) { - ASSERT(ctrl, agraph->where[i]>=0 && agraph->where[i]<=2); + PASSERT(ctrl, agraph->where[i]>=0 && agraph->where[i]<=2); if (agraph->where[i] == 2) agraph->where[i] = ctrl->nparts+2*mypart; else @@ -108,79 +91,80 @@ void InitMultisection(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) } /* Determine which PE got the minimum cut */ - lpecut[0] = agraph->mincut; - MPI_Comm_rank(newcomm, lpecut+1); - MPI_Allreduce(lpecut, gpecut, 1, MPI_2INT, MPI_MINLOC, newcomm); + lpecut.cut = agraph->mincut; + lpecut.rank = myrank; + gkMPI_Allreduce(&lpecut, &gpecut, 1, MPI_DOUBLE_INT, MPI_MINLOC, newcomm); - /* myprintf(ctrl, "Nvtxs: %d, Mincut: %d, GMincut: %d, %d\n", agraph->nvtxs, - agraph->mincut, gpecut[0], gpecut[1]); */ + /* myprintf(ctrl, "Nvtxs: %"PRIDX", Mincut: %"PRIDX", GMincut: %"PRIDX", %"PRIDX"\n", agraph->nvtxs, + agraph->mincut, (idx_t)gpecut.cut, (idx_t)gpecut.rank); */ /* Send the best where to the root processor of this partition */ - if (lpecut[1] == gpecut[1] && gpecut[1] != 0) - MPI_Send((void *)agraph->where, agraph->nvtxs, IDX_DATATYPE, 0, 1, newcomm); - if (lpecut[1] == 0 && gpecut[1] != 0) - MPI_Recv((void *)agraph->where, agraph->nvtxs, IDX_DATATYPE, gpecut[1], 1, newcomm, &ctrl->status); + if (myrank != 0 && myrank == gpecut.rank) + gkMPI_Send((void *)agraph->where, agraph->nvtxs, IDX_T, 0, 1, newcomm); + if (myrank == 0 && myrank != gpecut.rank) + gkMPI_Recv((void *)agraph->where, agraph->nvtxs, IDX_T, gpecut.rank, 1, + newcomm, &ctrl->status); /* Create a communicator that stores all the i-th processors of the newcomm */ - MPI_Comm_split(ctrl->comm, lpecut[1], 0, &labelcomm); + gkMPI_Comm_split(ctrl->comm, myrank, 0, &labelcomm); /* Map the separator back to agraph. This is inefficient! */ - if (lpecut[1] == 0) { - gwhere = idxsmalloc(graph->gnvtxs, 0, "InitMultisection: gwhere"); + if (myrank == 0) { + gwhere = iset(graph->gnvtxs, 0, iwspacemalloc(ctrl, graph->gnvtxs)); for (i=0; invtxs; i++) gwhere[label[i]] = agraph->where[i]; - } - GKfree((void **)&agraph->where, LTERM); - agraph->where = part; - - if (lpecut[1] == 0) { - MPI_Reduce((void *)gwhere, (void *)agraph->where, graph->gnvtxs, IDX_DATATYPE, + gkMPI_Reduce((void *)gwhere, (void *)part, graph->gnvtxs, IDX_T, MPI_SUM, 0, labelcomm); - GKfree((void **)&gwhere, LTERM); } + WCOREPOP; /* free agraph->where & gwhere */ + + agraph->where = part; + /* The minimum PE performs the Scatter */ vtxdist = graph->vtxdist; - ASSERT(ctrl, graph->where != NULL); - GKfree((void **)&graph->where, LTERM); /* Remove the propagated down where info */ - graph->where = idxmalloc(graph->nvtxs+graph->nrecv, "InitPartition: where"); + PASSERT(ctrl, graph->where != NULL); + gk_free((void **)&graph->where, LTERM); /* Remove the propagated down where info */ + graph->where = imalloc(graph->nvtxs+graph->nrecv, "InitPartition: where"); - sendcounts = imalloc(ctrl->npes, "InitPartitionNew: sendcounts"); - displs = imalloc(ctrl->npes, "InitPartitionNew: displs"); + sendcounts = iwspacemalloc(ctrl, ctrl->npes); + displs = iwspacemalloc(ctrl, ctrl->npes); for (i=0; inpes; i++) { sendcounts[i] = vtxdist[i+1]-vtxdist[i]; displs[i] = vtxdist[i]; } - MPI_Scatterv((void *)agraph->where, sendcounts, displs, IDX_DATATYPE, - (void *)graph->where, graph->nvtxs, IDX_DATATYPE, 0, ctrl->comm); - - GKfree((void **)&sendcounts, (void **)&displs, (void **)&label, LTERM); + gkMPI_Scatterv((void *)agraph->where, sendcounts, displs, IDX_T, + (void *)graph->where, graph->nvtxs, IDX_T, 0, ctrl->comm); + agraph->label = label; FreeGraph(agraph); - MPI_Comm_free(&newcomm); - MPI_Comm_free(&labelcomm); + gkMPI_Comm_free(&newcomm); + gkMPI_Comm_free(&labelcomm); - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); + WCOREPOP; + IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); } -/************************************************************************* -* This function assembles the graph into a single processor -**************************************************************************/ -GraphType *AssembleMultisectedGraph(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) +/*************************************************************************/ +/*! This function assembles the graph into a single processor */ +/*************************************************************************/ +graph_t *AssembleMultisectedGraph(ctrl_t *ctrl, graph_t *graph) { - int i, j, k, l, gnvtxs, nvtxs, gnedges, nedges, gsize; - idxtype *xadj, *vwgt, *where, *adjncy, *adjwgt, *vtxdist, *imap; - idxtype *axadj, *aadjncy, *aadjwgt, *avwgt, *awhere, *alabel; - idxtype *mygraph, *ggraph; - int *recvcounts, *displs, mysize; - GraphType *agraph; + idx_t i, j, k, l, gnvtxs, nvtxs, gnedges, nedges, gsize; + idx_t *xadj, *vwgt, *where, *adjncy, *adjwgt, *vtxdist, *imap; + idx_t *axadj, *aadjncy, *aadjwgt, *avwgt, *awhere, *alabel; + idx_t *mygraph, *ggraph; + idx_t *recvcounts, *displs, mysize; + graph_t *agraph; + + WCOREPUSH; gnvtxs = graph->gnvtxs; nvtxs = graph->nvtxs; @@ -193,18 +177,23 @@ GraphType *AssembleMultisectedGraph(CtrlType *ctrl, GraphType *graph, WorkSpaceT vtxdist = graph->vtxdist; imap = graph->imap; - /* Determine the # of idxtype to receive from each processor */ - recvcounts = imalloc(ctrl->npes, "AssembleGraph: recvcounts"); + /* Determine the # of idx_t to receive from each processor */ + recvcounts = iwspacemalloc(ctrl, ctrl->npes); mysize = 3*nvtxs + 2*nedges; - MPI_Allgather((void *)(&mysize), 1, MPI_INT, (void *)recvcounts, 1, MPI_INT, ctrl->comm); + gkMPI_Allgather((void *)(&mysize), 1, IDX_T, (void *)recvcounts, 1, IDX_T, ctrl->comm); - displs = imalloc(ctrl->npes+1, "AssembleGraph: displs"); - displs[0] = 0; - for (i=1; inpes+1; i++) + displs = iwspacemalloc(ctrl, ctrl->npes+1); + for (displs[0]=0, i=1; inpes+1; i++) displs[i] = displs[i-1] + recvcounts[i-1]; + /* allocate memory for the recv buffer of the assembled graph */ + gsize = displs[ctrl->npes]; + ggraph = iwspacemalloc(ctrl, gsize); + /* Construct the one-array storage format of the assembled graph */ - mygraph = (mysize <= wspace->maxcore ? wspace->core : idxmalloc(mysize, "AssembleGraph: mygraph")); + WCOREPUSH; /* for freeing mygraph */ + mygraph = iwspacemalloc(ctrl, mysize); + for (k=i=0; inpes]; - ggraph = (gsize <= wspace->maxcore-mysize ? wspace->core+mysize : idxmalloc(gsize, "AssembleGraph: ggraph")); - MPI_Allgatherv((void *)mygraph, mysize, IDX_DATATYPE, (void *)ggraph, recvcounts, displs, IDX_DATATYPE, ctrl->comm); + gkMPI_Allgatherv((void *)mygraph, mysize, IDX_T, (void *)ggraph, + recvcounts, displs, IDX_T, ctrl->comm); + + WCOREPOP; /* free mygraph */ - GKfree((void **)&recvcounts, (void **)&displs, LTERM); - if (mysize > wspace->maxcore) - GKfree((void **)&mygraph, LTERM); agraph = CreateGraph(); - agraph->nvtxs = gnvtxs; + agraph->nvtxs = gnvtxs; + agraph->ncon = 1; agraph->nedges = gnedges = (gsize-3*gnvtxs)/2; /* Allocate memory for the assembled graph */ - axadj = agraph->xadj = idxmalloc(gnvtxs+1, "AssembleGraph: axadj"); - avwgt = agraph->vwgt = idxmalloc(gnvtxs, "AssembleGraph: avwgt"); - awhere = agraph->where = idxmalloc(gnvtxs, "AssembleGraph: awhere"); - aadjncy = agraph->adjncy = idxmalloc(gnedges, "AssembleGraph: adjncy"); - aadjwgt = agraph->adjwgt = idxmalloc(gnedges, "AssembleGraph: adjwgt"); - alabel = agraph->label = idxmalloc(gnvtxs, "AssembleGraph: alabel"); + axadj = agraph->xadj = imalloc(gnvtxs+1, "AssembleGraph: axadj"); + avwgt = agraph->vwgt = imalloc(gnvtxs, "AssembleGraph: avwgt"); + awhere = agraph->where = imalloc(gnvtxs, "AssembleGraph: awhere"); + aadjncy = agraph->adjncy = imalloc(gnedges, "AssembleGraph: adjncy"); + aadjwgt = agraph->adjwgt = imalloc(gnedges, "AssembleGraph: adjwgt"); + alabel = agraph->label = imalloc(gnvtxs, "AssembleGraph: alabel"); for (k=j=i=0; i wspace->maxcore-mysize) - GKfree((void **)&ggraph, LTERM); + WCOREPOP; return agraph; } diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/initpart.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/initpart.c index 3758435a..aa609a22 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/initpart.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/initpart.c @@ -9,7 +9,7 @@ * Started 3/4/96 * George * - * $Id: initpart.c,v 1.2 2003/07/21 17:18:49 karypis Exp $ + * $Id: initpart.c 10542 2011-07-11 16:56:22Z karypis $ */ #include @@ -25,196 +25,204 @@ * This algorithm assembles the graph to all the processors and preceeds * by parallelizing the recursive bisection step. **************************************************************************/ -void Mc_InitPartition_RB(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) +void InitPartition(ctrl_t *ctrl, graph_t *graph) { - int i, j; - int ncon, mype, npes, gnvtxs, ngroups; - idxtype *xadj, *adjncy, *adjwgt, *vwgt; - idxtype *part, *gwhere0, *gwhere1; - idxtype *tmpwhere, *tmpvwgt, *tmpxadj, *tmpadjncy, *tmpadjwgt; - GraphType *agraph; - int lnparts, fpart, fpe, lnpes; - int twoparts=2, numflag = 0, wgtflag = 3, moptions[10], edgecut, max_cut; - float *mytpwgts, mytpwgts2[2], lbvec[MAXNCON], lbsum, min_lbsum, wsum; + idx_t i, j, ncon, mype, npes, gnvtxs, ngroups; + idx_t *xadj, *adjncy, *adjwgt, *vwgt; + idx_t *part, *gwhere0, *gwhere1; + idx_t *tmpwhere, *tmpvwgt, *tmpxadj, *tmpadjncy, *tmpadjwgt; + graph_t *agraph; + idx_t lnparts, fpart, fpe, lnpes; + idx_t twoparts=2, moptions[METIS_NOPTIONS], edgecut, max_cut; + real_t *tpwgts, *tpwgts2, *lbvec, lbsum, min_lbsum, wsum; MPI_Comm ipcomm; struct { - float sum; + double sum; int rank; } lpesum, gpesum; - ncon = graph->ncon; - ngroups = amax(amin(RIP_SPLIT_FACTOR, ctrl->npes), 1); + WCOREPUSH; - IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); + ncon = graph->ncon; - agraph = Mc_AssembleAdaptiveGraph(ctrl, graph, wspace); - part = idxmalloc(agraph->nvtxs, "Mc_IP_RB: part"); - xadj = idxmalloc(agraph->nvtxs+1, "Mc_IP_RB: xadj"); - adjncy = idxmalloc(agraph->nedges, "Mc_IP_RB: adjncy"); - adjwgt = idxmalloc(agraph->nedges, "Mc_IP_RB: adjwgt"); - vwgt = idxmalloc(agraph->nvtxs*ncon, "Mc_IP_RB: vwgt"); + ngroups = gk_max(gk_min(RIP_SPLIT_FACTOR, ctrl->npes), 1); - idxcopy(agraph->nvtxs*ncon, agraph->vwgt, vwgt); - idxcopy(agraph->nvtxs+1, agraph->xadj, xadj); - idxcopy(agraph->nedges, agraph->adjncy, adjncy); - idxcopy(agraph->nedges, agraph->adjwgt, adjwgt); + IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm)); + IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); - MPI_Comm_split(ctrl->gcomm, ctrl->mype % ngroups, 0, &ipcomm); - MPI_Comm_rank(ipcomm, &mype); - MPI_Comm_size(ipcomm, &npes); + lbvec = rwspacemalloc(ctrl, ncon); + /* assemble the graph to all the processors */ + agraph = AssembleAdaptiveGraph(ctrl, graph); gnvtxs = agraph->nvtxs; - gwhere0 = idxsmalloc(gnvtxs, 0, "Mc_IP_RB: gwhere0"); - gwhere1 = idxmalloc(gnvtxs, "Mc_IP_RB: gwhere1"); + /* make a copy of the graph's structure for later */ + xadj = icopy(gnvtxs+1, agraph->xadj, iwspacemalloc(ctrl, gnvtxs+1)); + vwgt = icopy(gnvtxs*ncon, agraph->vwgt, iwspacemalloc(ctrl, gnvtxs*ncon)); + adjncy = icopy(agraph->nedges, agraph->adjncy, iwspacemalloc(ctrl, agraph->nedges)); + adjwgt = icopy(agraph->nedges, agraph->adjwgt, iwspacemalloc(ctrl, agraph->nedges)); + part = iwspacemalloc(ctrl, gnvtxs); + + /* create different processor groups */ + gkMPI_Comm_split(ctrl->gcomm, ctrl->mype % ngroups, 0, &ipcomm); + gkMPI_Comm_rank(ipcomm, &mype); + gkMPI_Comm_size(ipcomm, &npes); - /* ADD: this assumes that tpwgts for all constraints is the same */ - /* ADD: this is necessary because serial metis does not support the general case */ - mytpwgts = fsmalloc(ctrl->nparts, 0.0, "mytpwgts"); - for (i=0; inparts; i++) - for (j=0; jtpwgts[i*ncon+j]; - for (i=0; inparts; i++) - mytpwgts[i] /= (float)ncon; /* Go into the recursive bisection */ - /* ADD: consider changing this to breadth-first type bisection */ - moptions[0] = 0; - moptions[7] = ctrl->sync + (ctrl->mype % ngroups) + 1; + METIS_SetDefaultOptions(moptions); + moptions[METIS_OPTION_SEED] = ctrl->sync + (ctrl->mype % ngroups) + 1; + if (ctrl->fast) { + moptions[METIS_OPTION_NITER] = 1; + moptions[METIS_OPTION_NIPARTS] = 1; + moptions[METIS_OPTION_DROPEDGES] = 1; + moptions[METIS_OPTION_ONDISK] = 1; + //moptions[METIS_OPTION_NO2HOP] = 0; + } + + tpwgts = ctrl->tpwgts; + tpwgts2 = rwspacemalloc(ctrl, 2*ncon); lnparts = ctrl->nparts; fpart = fpe = 0; lnpes = npes; while (lnpes > 1 && lnparts > 1) { - /* Determine the weights of the partitions */ - mytpwgts2[0] = ssum(lnparts/2, mytpwgts+fpart); - mytpwgts2[1] = 1.0-mytpwgts2[0]; - - if (ncon == 1) - METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, - agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2, - moptions, &edgecut, part); - else { - METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, - agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, - &twoparts, mytpwgts2, moptions, &edgecut, part); + /* determine the weights of the two partitions as a function of the + weight of the target partition weights */ + for (j=(lnparts>>1), i=0; invtxs, &ncon, agraph->xadj, agraph->adjncy, + agraph->vwgt, NULL, agraph->adjwgt, &twoparts, tpwgts2, NULL, moptions, + &edgecut, part); - /* I'm picking the left branch */ + /* pick one of the branches */ if (mype < fpe+lnpes/2) { - Mc_KeepPart(agraph, wspace, part, 0); - lnpes = lnpes/2; + KeepPart(ctrl, agraph, part, 0); + lnpes = lnpes/2; lnparts = lnparts/2; } else { - Mc_KeepPart(agraph, wspace, part, 1); - fpart = fpart + lnparts/2; - fpe = fpe + lnpes/2; - lnpes = lnpes - lnpes/2; + KeepPart(ctrl, agraph, part, 1); + fpart = fpart + lnparts/2; + fpe = fpe + lnpes/2; + lnpes = lnpes - lnpes/2; lnparts = lnparts - lnparts/2; } } - /* In case npes is greater than or equal to nparts */ - if (lnparts == 1) { + gwhere0 = iset(gnvtxs, 0, iwspacemalloc(ctrl, gnvtxs)); + gwhere1 = iwspacemalloc(ctrl, gnvtxs); + + if (lnparts == 1) { /* Case npes is greater than or equal to nparts */ /* Only the first process will assign labels (for the reduction to work) */ if (mype == fpe) { for (i=0; invtxs; i++) gwhere0[agraph->label[i]] = fpart; } } - /* In case npes is smaller than nparts */ - else { - if (ncon == 1) - METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, - agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart, - moptions, &edgecut, part); - else - METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, - agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, - &lnparts, mytpwgts+fpart, moptions, &edgecut, part); + else { /* Case in which npes is smaller than nparts */ + /* create the normalized tpwgts for the lnparts from ctrl->tpwgts */ + tpwgts = rwspacemalloc(ctrl, lnparts*ncon); + for (j=0; jtpwgts[(fpart+i)*ncon+j]; + wsum += tpwgts[i*ncon+j]; + } + for (wsum=1.0/wsum, i=0; invtxs, &ncon, agraph->xadj, agraph->adjncy, + agraph->vwgt, NULL, agraph->adjwgt, &lnparts, tpwgts, NULL, moptions, + &edgecut, part); for (i=0; invtxs; i++) gwhere0[agraph->label[i]] = fpart + part[i]; } - MPI_Allreduce((void *)gwhere0, (void *)gwhere1, gnvtxs, IDX_DATATYPE, MPI_SUM, ipcomm); + gkMPI_Allreduce((void *)gwhere0, (void *)gwhere1, gnvtxs, IDX_T, MPI_SUM, ipcomm); if (ngroups > 1) { - tmpxadj = agraph->xadj; + tmpxadj = agraph->xadj; tmpadjncy = agraph->adjncy; tmpadjwgt = agraph->adjwgt; - tmpvwgt = agraph->vwgt; - tmpwhere = agraph->where; - agraph->xadj = xadj; + tmpvwgt = agraph->vwgt; + tmpwhere = agraph->where; + + agraph->xadj = xadj; agraph->adjncy = adjncy; agraph->adjwgt = adjwgt; - agraph->vwgt = vwgt; - agraph->where = gwhere1; - agraph->vwgt = vwgt; - agraph->nvtxs = gnvtxs; - Mc_ComputeSerialBalance(ctrl, agraph, gwhere1, lbvec); - lbsum = ssum(ncon, lbvec); + agraph->vwgt = vwgt; + agraph->where = gwhere1; + agraph->vwgt = vwgt; + agraph->nvtxs = gnvtxs; edgecut = ComputeSerialEdgeCut(agraph); - MPI_Allreduce((void *)&edgecut, (void *)&max_cut, 1, MPI_INT, MPI_MAX, ctrl->gcomm); - MPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, MPI_FLOAT, MPI_MIN, ctrl->gcomm); + ComputeSerialBalance(ctrl, agraph, gwhere1, lbvec); + lbsum = rsum(ncon, lbvec, 1); + + gkMPI_Allreduce((void *)&edgecut, (void *)&max_cut, 1, IDX_T, MPI_MAX, ctrl->gcomm); + gkMPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, REAL_T, MPI_MIN, ctrl->gcomm); lpesum.sum = lbsum; - if (min_lbsum < UNBALANCE_FRACTION * (float)(ncon)) { - if (lbsum < UNBALANCE_FRACTION * (float)(ncon)) - lpesum.sum = (float) (edgecut); + if (min_lbsum < UNBALANCE_FRACTION*ncon) { + if (lbsum < UNBALANCE_FRACTION*ncon) + lpesum.sum = edgecut; else - lpesum.sum = (float) (max_cut); + lpesum.sum = max_cut; } + lpesum.rank = ctrl->mype; - MPI_Comm_rank(ctrl->gcomm, &(lpesum.rank)); - MPI_Allreduce((void *)&lpesum, (void *)&gpesum, 1, MPI_FLOAT_INT, MPI_MINLOC, ctrl->gcomm); - MPI_Bcast((void *)gwhere1, gnvtxs, IDX_DATATYPE, gpesum.rank, ctrl->gcomm); + gkMPI_Allreduce((void *)&lpesum, (void *)&gpesum, 1, MPI_DOUBLE_INT, + MPI_MINLOC, ctrl->gcomm); + gkMPI_Bcast((void *)gwhere1, gnvtxs, IDX_T, gpesum.rank, ctrl->gcomm); - agraph->xadj = tmpxadj; + agraph->xadj = tmpxadj; agraph->adjncy = tmpadjncy; agraph->adjwgt = tmpadjwgt; - agraph->vwgt = tmpvwgt; - agraph->where = tmpwhere; + agraph->vwgt = tmpvwgt; + agraph->where = tmpwhere; } - idxcopy(graph->nvtxs, gwhere1+graph->vtxdist[ctrl->mype], graph->where); + icopy(graph->nvtxs, gwhere1+graph->vtxdist[ctrl->mype], graph->where); FreeGraph(agraph); - MPI_Comm_free(&ipcomm); - GKfree((void **)&gwhere0, (void **)&gwhere1, (void **)&mytpwgts, (void **)&part, (void **)&xadj, (void **)&adjncy, (void **)&adjwgt, (void **)&vwgt, LTERM); + gkMPI_Comm_free(&ipcomm); - IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); + IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); + WCOREPOP; } /************************************************************************* * This function keeps one parts **************************************************************************/ -void Mc_KeepPart(GraphType *graph, WorkSpaceType *wspace, idxtype *part, int mypart) +void KeepPart(ctrl_t *ctrl, graph_t *graph, idx_t *part, idx_t mypart) { - int h, i, j, k; - int nvtxs, ncon, mynvtxs, mynedges; - idxtype *xadj, *vwgt, *adjncy, *adjwgt, *label; - idxtype *rename; + idx_t h, i, j, k; + idx_t nvtxs, ncon, mynvtxs, mynedges; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *label; + idx_t *rename; - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - vwgt = graph->vwgt; + WCOREPUSH; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; - label = graph->label; + label = graph->label; - rename = idxmalloc(nvtxs, "Mc_KeepPart: rename"); + rename = iwspacemalloc(ctrl, nvtxs); for (mynvtxs=0, i=0; invtxs = mynvtxs; + graph->nvtxs = mynvtxs; graph->nedges = mynedges; - GKfree((void **)&rename, LTERM); + WCOREPOP; } diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/kmetis.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/kmetis.c index 8b25fae5..6b1b8e11 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/kmetis.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/kmetis.c @@ -3,12 +3,12 @@ * * kmetis.c * - * This is the entry point of Mc_PARMETIS_PartGraphKway + * This is the entry point of ParMETIS_PartKway * * Started 10/19/96 * George * - * $Id: kmetis.c,v 1.9 2003/07/31 16:27:27 karypis Exp $ + * $Id: kmetis.c 10757 2011-09-15 22:07:47Z karypis $ * */ @@ -19,193 +19,140 @@ * This function assumes nothing about the graph distribution. * It is the general case. ************************************************************************************/ -void ParMETIS_V3_PartKway(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *ncon, int *nparts, - float *tpwgts, float *ubvec, int *options, int *edgecut, idxtype *part, - MPI_Comm *comm) +int ParMETIS_V3_PartKway(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t *adjwgt, idx_t *wgtflag, idx_t *numflag, idx_t *ncon, idx_t *nparts, + real_t *tpwgts, real_t *ubvec, idx_t *options, idx_t *edgecut, idx_t *part, + MPI_Comm *comm) { - int h, i; - int nvtxs = -1, npes, mype; - CtrlType ctrl; - WorkSpaceType wspace; - GraphType *graph; - float avg, maximb, *mytpwgts; - int moptions[10]; - int seed, dbglvl = 0; - int iwgtflag, inumflag, incon, inparts, ioptions[10]; - float *itpwgts, iubvec[MAXNCON]; - - MPI_Comm_size(*comm, &npes); - MPI_Comm_rank(*comm, &mype); - - /* Deal with poor vertex distributions */ - ctrl.comm = *comm; - if (GlobalSEMin(&ctrl, vtxdist[mype+1]-vtxdist[mype]) < 1) { - if (mype == 0) - printf("Error: Poor vertex distribution (processor with no vertices).\n"); - return; - } + idx_t h, i, status, nvtxs, npes, mype, seed, dbglvl; + ctrl_t *ctrl; + graph_t *graph; + idx_t moptions[METIS_NOPTIONS]; + size_t curmem; + + /* Check the input parameters and return if an error */ + status = CheckInputsPartKway(vtxdist, xadj, adjncy, vwgt, adjwgt, wgtflag, + numflag, ncon, nparts, tpwgts, ubvec, options, edgecut, part, + comm); + if (GlobalSEMinComm(*comm, status) == 0) + return METIS_ERROR; - /********************************/ - /* Try and take care bad inputs */ - /********************************/ - if (options != NULL && options[0] == 1) - dbglvl = options[PMV3_OPTION_DBGLVL]; + status = METIS_OK; + gk_malloc_init(); + curmem = gk_GetCurMemoryUsed(); - CheckInputs(STATIC_PARTITION, npes, dbglvl, wgtflag, &iwgtflag, numflag, &inumflag, - ncon, &incon, nparts, &inparts, tpwgts, &itpwgts, ubvec, iubvec, NULL, - NULL, options, ioptions, part, comm); + /* Set up the control */ + ctrl = SetupCtrl(PARMETIS_OP_KMETIS, options, *ncon, *nparts, tpwgts, ubvec, *comm); + npes = ctrl->npes; + mype = ctrl->mype; - /**********************************/ /* Take care the nparts == 1 case */ - /**********************************/ - if (inparts <= 1) { - idxset(vtxdist[mype+1]-vtxdist[mype], 0, part); + if (*nparts == 1) { + iset(vtxdist[mype+1]-vtxdist[mype], (*numflag == 0 ? 0 : 1), part); *edgecut = 0; - return; + goto DONE; } - /*******************************/ + /* Take care of npes == 1 case */ - /*******************************/ if (npes == 1) { - moptions[0] = 0; nvtxs = vtxdist[1] - vtxdist[0]; + METIS_SetDefaultOptions(moptions); + moptions[METIS_OPTION_NUMBERING] = *numflag; - if (incon == 1) { - METIS_WPartGraphKway(&nvtxs, xadj, adjncy, vwgt, adjwgt, &iwgtflag, &inumflag, - &inparts, itpwgts, moptions, edgecut, part); - } - else { - /* ADD: this is because METIS does not support tpwgts for all constraints */ - mytpwgts = fmalloc(inparts, "mytpwgts"); - for (i=0; i 0) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1); - /*****************************/ - /* Set up control structures */ - /*****************************/ - if (ioptions[0] == 1) { - dbglvl = ioptions[PMV3_OPTION_DBGLVL]; - seed = ioptions[PMV3_OPTION_SEED]; - } - else { - dbglvl = GLOBAL_DBGLVL; - seed = GLOBAL_SEED; - } - SetUpCtrl(&ctrl, inparts, dbglvl, *comm); - ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*incon*amax(npes, inparts)); - ctrl.seed = (seed == 0) ? mype : seed*mype; - ctrl.sync = GlobalSEMax(&ctrl, seed); - ctrl.partType = STATIC_PARTITION; - ctrl.ps_relation = -1; - ctrl.tpwgts = itpwgts; - scopy(incon, iubvec, ctrl.ubvec); - - graph = Mc_SetUpGraph(&ctrl, incon, vtxdist, xadj, vwgt, adjncy, adjwgt, &iwgtflag); - - AllocateWSpace(&ctrl, graph, &wspace); - - IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); - - /*******************************************/ - /* Check for funny cases */ - /* -graph with no edges */ - /* -graph with self edges */ - /* -graph with less than 20*npe nodes */ - /*******************************************/ - if (vtxdist[npes] < SMALLGRAPH || - vtxdist[npes] < npes*20 || - GlobalSESum(&ctrl, graph->nedges) == 0) { - IFSET(ctrl.dbglvl, DBG_INFO, rprintf(&ctrl, "Partitioning a graph of size %d serially\n", vtxdist[npes])); - PartitionSmallGraph(&ctrl, graph, &wspace); + graph = SetupGraph(ctrl, *ncon, vtxdist, xadj, vwgt, NULL, adjncy, adjwgt, *wgtflag); + + /* Setup the workspace */ + AllocateWSpace(ctrl, 10*graph->nvtxs); + + + /* Partition the graph */ + STARTTIMER(ctrl, ctrl->TotalTmr); + + ctrl->CoarsenTo = gk_min(vtxdist[npes]+1, 25*(*ncon)*gk_max(npes, *nparts)); + if (vtxdist[npes] < SMALLGRAPH + || vtxdist[npes] < npes*20 + || GlobalSESum(ctrl, graph->nedges) == 0) { /* serially */ + IFSET(ctrl->dbglvl, DBG_INFO, + rprintf(ctrl, "Partitioning a graph of size %"PRIDX" serially\n", vtxdist[npes])); + PartitionSmallGraph(ctrl, graph); } - else { - /***********************/ - /* Partition the graph */ - /***********************/ - Mc_Global_Partition(&ctrl, graph, &wspace); + else { /* in parallel */ + Global_Partition(ctrl, graph); } - ParallelReMapGraph(&ctrl, graph, &wspace); - - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); + ParallelReMapGraph(ctrl, graph); - idxcopy(graph->nvtxs, graph->where, part); + icopy(graph->nvtxs, graph->where, part); *edgecut = graph->mincut; - /*******************/ + STOPTIMER(ctrl, ctrl->TotalTmr); + + /* Print out stats */ - /*******************/ - IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - - if (ctrl.dbglvl&DBG_INFO) { - rprintf(&ctrl, "Final %d-way CUT: %6d \tBalance: ", inparts, graph->mincut); - avg = 0.0; - for (h=0; hgnpwgts[i*incon+h]/itpwgts[i*incon+h]); - avg += maximb; - rprintf(&ctrl, "%.3f ", maximb); - } - rprintf(&ctrl, " avg: %.3f\n", avg/(float)incon); - } + IFSET(ctrl->dbglvl, DBG_TIME, PrintTimingInfo(ctrl)); + IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->gcomm)); + IFSET(ctrl->dbglvl, DBG_INFO, PrintPostPartInfo(ctrl, graph, 0)); - GKfree((void **)&itpwgts, (void **)&graph->lnpwgts, (void **)&graph->gnpwgts, - (void **)&graph->nvwgt, LTERM); - FreeInitialGraphAndRemap(graph, iwgtflag, 1); - FreeWSpace(&wspace); - FreeCtrl(&ctrl); + FreeInitialGraphAndRemap(graph); - if (inumflag == 1) + if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0); + +DONE: + FreeCtrl(&ctrl); + if (gk_GetCurMemoryUsed() - curmem > 0) { + printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n", + (ssize_t)(gk_GetCurMemoryUsed() - curmem)); + } + gk_malloc_cleanup(0); + + return (int)status; } -/************************************************************************* -* This function is the driver to the multi-constraint partitioning algorithm. -**************************************************************************/ -void Mc_Global_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) +/*************************************************************************/ +/*! This function is the driver to the multi-constraint partitioning + algorithm. +*/ +/*************************************************************************/ +void Global_Partition(ctrl_t *ctrl, graph_t *graph) { - int i, ncon, nparts; - float ftmp, ubavg, lbavg, lbvec[MAXNCON]; + idx_t i, ncon, nparts; + real_t ftmp, ubavg, lbavg, *lbvec; + + WCOREPUSH; ncon = graph->ncon; nparts = ctrl->nparts; - ubavg = savg(graph->ncon, ctrl->ubvec); + ubavg = ravg(graph->ncon, ctrl->ubvec); - SetUp(ctrl, graph, wspace); + CommSetup(ctrl, graph); + + lbvec = rwspacemalloc(ctrl, ncon); if (ctrl->dbglvl&DBG_PROGRESS) { - rprintf(ctrl, "[%6d %8d %5d %5d] [%d] [", graph->gnvtxs, GlobalSESum(ctrl, graph->nedges), + rprintf(ctrl, "[%6"PRIDX" %8"PRIDX" %5"PRIDX" %5"PRIDX"] [%"PRIDX"] [", graph->gnvtxs, GlobalSESum(ctrl, graph->nedges), GlobalSEMin(ctrl, graph->nvtxs), GlobalSEMax(ctrl, graph->nvtxs), ctrl->CoarsenTo); for (i=0; invwgt[samin_strd(graph->nvtxs, graph->nvwgt+i, ncon)*ncon+i])); + rprintf(ctrl, " %.3"PRREAL"", GlobalSEMinFloat(ctrl,graph->nvwgt[rargmin_strd(graph->nvtxs, graph->nvwgt+i, ncon)*ncon+i])); rprintf(ctrl, "] ["); for (i=0; invwgt[samax_strd(graph->nvtxs, graph->nvwgt+i, ncon)*ncon+i])); + rprintf(ctrl, " %.3"PRREAL"", GlobalSEMaxFloat(ctrl, graph->nvwgt[rargmax_strd(graph->nvtxs, graph->nvwgt+i, ncon)*ncon+i])); rprintf(ctrl, "]\n"); } @@ -214,71 +161,85 @@ void Mc_Global_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace graph->gnvtxs > graph->finer->gnvtxs*COARSEN_FRACTION)) { /* Done with coarsening. Find a partition */ - graph->where = idxmalloc(graph->nvtxs+graph->nrecv, "graph->where"); - Mc_InitPartition_RB(ctrl, graph, wspace); + AllocateRefinementWorkSpace(ctrl, 2*graph->nedges); + graph->where = imalloc(graph->nvtxs+graph->nrecv, "graph->where"); + + InitPartition(ctrl, graph); if (ctrl->dbglvl&DBG_PROGRESS) { - Mc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec); - rprintf(ctrl, "nvtxs: %10d, balance: ", graph->gnvtxs); + ComputePartitionParams(ctrl, graph); + ComputeParallelBalance(ctrl, graph, graph->where, lbvec); + rprintf(ctrl, "nvtxs: %10"PRIDX", cut: %8"PRIDX", balance: ", + graph->gnvtxs, graph->mincut); for (i=0; incon; i++) - rprintf(ctrl, "%.3f ", lbvec[i]); + rprintf(ctrl, "%.3"PRREAL" ", lbvec[i]); rprintf(ctrl, "\n"); + + /* free memory allocated by ComputePartitionParams */ + gk_free((void **)&graph->ckrinfo, &graph->lnpwgts, &graph->gnpwgts, LTERM); } /* In case no coarsening took place */ if (graph->finer == NULL) { - Mc_ComputePartitionParams(ctrl, graph, wspace); - Mc_KWayFM(ctrl, graph, wspace, NGR_PASSES); + ComputePartitionParams(ctrl, graph); + KWayFM(ctrl, graph, NGR_PASSES); } } else { - Match_Global(ctrl, graph, wspace); + Match_Global(ctrl, graph); + + graph_WriteToDisk(ctrl, graph); - Mc_Global_Partition(ctrl, graph->coarser, wspace); + Global_Partition(ctrl, graph->coarser); - Mc_ProjectPartition(ctrl, graph, wspace); + graph_ReadFromDisk(ctrl, graph); - Mc_ComputePartitionParams(ctrl, graph, wspace); + ProjectPartition(ctrl, graph); + + ComputePartitionParams(ctrl, graph); if (graph->ncon > 1 && graph->level < 3) { for (i=0; ignpwgts+i, ncon); + ftmp = rsum(nparts, graph->gnpwgts+i, ncon); if (ftmp != 0.0) - lbvec[i] = (float)(nparts) * - graph->gnpwgts[samax_strd(nparts, graph->gnpwgts+i, ncon)*ncon+i]/ftmp; + lbvec[i] = (real_t)(nparts) * + graph->gnpwgts[rargmax_strd(nparts, graph->gnpwgts+i, ncon)*ncon+i]/ftmp; else lbvec[i] = 1.0; } - lbavg = savg(graph->ncon, lbvec); + lbavg = ravg(graph->ncon, lbvec); if (lbavg > ubavg + 0.035) { if (ctrl->dbglvl&DBG_PROGRESS) { - Mc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec); - rprintf(ctrl, "nvtxs: %10d, cut: %8d, balance: ", graph->gnvtxs, graph->mincut); + ComputeParallelBalance(ctrl, graph, graph->where, lbvec); + rprintf(ctrl, "nvtxs: %10"PRIDX", cut: %8"PRIDX", balance: ", + graph->gnvtxs, graph->mincut); for (i=0; incon; i++) - rprintf(ctrl, "%.3f ", lbvec[i]); - rprintf(ctrl, "\n"); + rprintf(ctrl, "%.3"PRREAL" ", lbvec[i]); + rprintf(ctrl, " [b]\n"); } - Mc_KWayBalance(ctrl, graph, wspace, graph->ncon); + KWayBalance(ctrl, graph, graph->ncon); } } - Mc_KWayFM(ctrl, graph, wspace, NGR_PASSES); + KWayFM(ctrl, graph, NGR_PASSES); if (ctrl->dbglvl&DBG_PROGRESS) { - Mc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec); - rprintf(ctrl, "nvtxs: %10d, cut: %8d, balance: ", graph->gnvtxs, graph->mincut); + ComputeParallelBalance(ctrl, graph, graph->where, lbvec); + rprintf(ctrl, "nvtxs: %10"PRIDX", cut: %8"PRIDX", balance: ", + graph->gnvtxs, graph->mincut); for (i=0; incon; i++) - rprintf(ctrl, "%.3f ", lbvec[i]); + rprintf(ctrl, "%.3"PRREAL" ", lbvec[i]); rprintf(ctrl, "\n"); } if (graph->level != 0) - GKfree((void **)&graph->lnpwgts, (void **)&graph->gnpwgts, LTERM); + gk_free((void **)&graph->lnpwgts, (void **)&graph->gnpwgts, LTERM); } - return; + WCOREPOP; + } diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/kwaybalance.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/kwaybalance.c deleted file mode 100644 index ffb7f5fd..00000000 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/kwaybalance.c +++ /dev/null @@ -1,456 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * mkwaybalance.c - * - * This file contains code that performs the k-way refinement - * - * Started 3/1/96 - * George - * - * $Id: kwaybalance.c,v 1.2 2003/07/21 17:18:49 karypis Exp $ - */ - -#include - -#define ProperSide(c, from, other) \ - (((c) == 0 && (from)-(other) < 0) || ((c) == 1 && (from)-(other) > 0)) - -/************************************************************************* -* This function performs k-way refinement -**************************************************************************/ -void Mc_KWayBalance(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, int npasses) -{ - int h, i, ii, iii, j, k, c; - int pass, nvtxs, nedges, ncon; - int nmoves, nmoved, nswaps; -/* int gnswaps; */ - int me, firstvtx, lastvtx, yourlastvtx; - int from, to = -1, oldto, oldcut, mydomain, yourdomain, imbalanced; - int npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts; - int nlupd, nsupd, nnbrs, nchanged; - idxtype *xadj, *ladjncy, *adjwgt, *vtxdist; - idxtype *where, *tmp_where, *moved; - float *lnpwgts, *gnpwgts; - idxtype *update, *supdate, *rupdate, *pe_updates; - idxtype *changed, *perm, *pperm, *htable; - idxtype *peind, *recvptr, *sendptr; - KeyValueType *swchanges, *rwchanges; - RInfoType *rinfo, *myrinfo, *tmp_myrinfo, *tmp_rinfo; - EdgeType *tmp_edegrees, *my_edegrees, *your_edegrees; - float lbvec[MAXNCON], *nvwgt, *badmaxpwgt, *ubvec, *tpwgts, lbavg, ubavg; - int *nupds_pe; -/* int ndirty, nclean, dptr; */ - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayTmr)); - - /*************************/ - /* set up common aliases */ - /*************************/ - nvtxs = graph->nvtxs; - nedges = graph->nedges; - ncon = graph->ncon; - - vtxdist = graph->vtxdist; - xadj = graph->xadj; - ladjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - firstvtx = vtxdist[mype]; - lastvtx = vtxdist[mype+1]; - - where = graph->where; - rinfo = graph->rinfo; - lnpwgts = graph->lnpwgts; - gnpwgts = graph->gnpwgts; - ubvec = ctrl->ubvec; - tpwgts = ctrl->tpwgts; - - nnbrs = graph->nnbrs; - peind = graph->peind; - recvptr = graph->recvptr; - sendptr = graph->sendptr; - - changed = idxmalloc(nvtxs, "KWR: changed"); - rwchanges = wspace->pairs; - swchanges = rwchanges + recvptr[nnbrs]; - - /************************************/ - /* set up important data structures */ - /************************************/ - perm = idxmalloc(nvtxs, "KWR: perm"); - pperm = idxmalloc(nparts, "KWR: pperm"); - - update = idxmalloc(nvtxs, "KWR: update"); - supdate = wspace->indices; - rupdate = supdate + recvptr[nnbrs]; - nupds_pe = imalloc(npes, "KWR: nupds_pe"); - htable = idxsmalloc(nvtxs+graph->nrecv, 0, "KWR: lhtable"); - badmaxpwgt = fmalloc(nparts*ncon, "badmaxpwgt"); - - for (i=0; inrecv, "KWR: tmp_where"); - tmp_rinfo = (RInfoType *)GKmalloc(sizeof(RInfoType)*nvtxs, "KWR: tmp_rinfo"); - tmp_edegrees = (EdgeType *)GKmalloc(sizeof(EdgeType)*nedges, "KWR: tmp_edegrees"); - - idxcopy(nvtxs+graph->nrecv, where, tmp_where); - for (i=0; imincut; - if (mype == 0) - RandomPermute(nparts, pperm, 1); - MPI_Bcast((void *)pperm, nparts, IDX_DATATYPE, 0, ctrl->comm); - FastRandomPermute(nvtxs, perm, 1); - - /*****************************/ - /* move dirty vertices first */ - /*****************************/ -/* - ndirty = 0; - for (i=0; iwhere, lbvec); - ubavg = savg(ncon, ubvec); - lbavg = savg(ncon, lbvec); - imbalanced = (lbavg > ubavg) ? 1 : 0; - - for (c=0; c<2; c++) { - nmoved = 0; - - /**********************************************/ - /* PASS ONE -- record stats for desired moves */ - /**********************************************/ - for (iii=0; iiinvwgt+i*ncon; - - for (h=0; h= tmp_rinfo[i].id) { - my_edegrees = tmp_rinfo[i].degrees; - - for (k=0; k= nvtxs) - continue; - - me = ladjncy[j]; - mydomain = tmp_where[me]; - - myrinfo = tmp_rinfo+me; - your_edegrees = myrinfo->degrees; - - if (mydomain == from) { - INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); - } - else { - if (mydomain == to) { - INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); - } - } - - /* Remove contribution from the .ed of 'from' */ - if (mydomain != from) { - for (k=0; kndegrees; k++) { - if (your_edegrees[k].edge == from) { - if (your_edegrees[k].ewgt == adjwgt[j]) { - myrinfo->ndegrees--; - your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge; - your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt; - } - else { - your_edegrees[k].ewgt -= adjwgt[j]; - } - break; - } - } - } - - /* Add contribution to the .ed of 'to' */ - if (mydomain != to) { - for (k=0; kndegrees; k++) { - if (your_edegrees[k].edge == to) { - your_edegrees[k].ewgt += adjwgt[j]; - break; - } - } - if (k == myrinfo->ndegrees) { - your_edegrees[myrinfo->ndegrees].edge = to; - your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j]; - } - } - } - } - } - } - } - - /*************************************************/ - /* PASS TWO -- commit the remainder of the moves */ - /*************************************************/ - nlupd = nsupd = nmoves = nchanged = 0; - for (iii=0; iiipexadj[i+1]-graph->pexadj[i] > 0) - changed[nchanged++] = i; - } - - /* Tell interested pe's the new where[] info for the interface vertices */ - CommChangedInterfaceData(ctrl, graph, nchanged, changed, where, - swchanges, rwchanges, wspace->pv4); - - - IFSET(ctrl->dbglvl, DBG_RMOVEINFO, - rprintf(ctrl, "\t[%d %d], [%.4f], [%d %d %d]\n", - pass, c, badmaxpwgt[0], - GlobalSESum(ctrl, nmoves), - GlobalSESum(ctrl, nsupd), - GlobalSESum(ctrl, nlupd))); - - /*------------------------------------------------------------- - / Time to communicate with processors to send the vertices - / whose degrees need to be update. - /-------------------------------------------------------------*/ - /* Issue the receives first */ - for (i=0; icomm, ctrl->rreq+i); - } - - /* Issue the sends next. This needs some preporcessing */ - for (i=0; iimap[supdate[i]]; - } - iidxsort(nsupd, supdate); - - for (j=i=0; icomm, ctrl->sreq+i); - j = k; - } - - /* OK, now get into the loop waiting for the send/recv operations to finish */ - MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); - for (i=0; istatuses+i, IDX_DATATYPE, nupds_pe+i); - MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); - - - /*------------------------------------------------------------- - / Place the recieved to-be updated vertices into update[] - /-------------------------------------------------------------*/ - for (i=0; idegrees; - your_edegrees = tmp_myrinfo->degrees; - - graph->lmincut -= myrinfo->ed; - myrinfo->ndegrees = 0; - myrinfo->id = 0; - myrinfo->ed = 0; - - for (j=xadj[i]; jed += adjwgt[j]; - - for (k=0; kndegrees; k++) { - if (my_edegrees[k].edge == yourdomain) { - my_edegrees[k].ewgt += adjwgt[j]; - your_edegrees[k].ewgt += adjwgt[j]; - break; - } - } - if (k == myrinfo->ndegrees) { - my_edegrees[k].edge = yourdomain; - my_edegrees[k].ewgt = adjwgt[j]; - your_edegrees[k].edge = yourdomain; - your_edegrees[k].ewgt = adjwgt[j]; - myrinfo->ndegrees++; - } - ASSERT(ctrl, myrinfo->ndegrees <= xadj[i+1]-xadj[i]); - ASSERT(ctrl, tmp_myrinfo->ndegrees <= xadj[i+1]-xadj[i]); - - } - else { - myrinfo->id += adjwgt[j]; - } - } - graph->lmincut += myrinfo->ed; - - tmp_myrinfo->id = myrinfo->id; - tmp_myrinfo->ed = myrinfo->ed; - tmp_myrinfo->ndegrees = myrinfo->ndegrees; - } - - /* finally, sum-up the partition weights */ - MPI_Allreduce((void *)lnpwgts, (void *)gnpwgts, nparts*ncon, - MPI_FLOAT, MPI_SUM, ctrl->comm); - } - graph->mincut = GlobalSESum(ctrl, graph->lmincut)/2; - - if (graph->mincut == oldcut) - break; - } - -/* - gnswaps = GlobalSESum(ctrl, nswaps); - if (mype == 0) - printf("niters: %d, nswaps: %d\n", pass+1, gnswaps); -*/ - - GKfree((void **)&badmaxpwgt, (void **)&update, (void **)&nupds_pe, (void **)&htable, LTERM); - GKfree((void **)&changed, (void **)&pperm, (void **)&perm, (void **)&moved, LTERM); - GKfree((void **)&tmp_where, (void **)&tmp_rinfo, (void **)&tmp_edegrees, LTERM); - - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayTmr)); -} - - diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/kwayfm.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/kwayfm.c deleted file mode 100644 index ca2376d0..00000000 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/kwayfm.c +++ /dev/null @@ -1,584 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * mkwayfm.c - * - * This file contains code that performs the k-way refinement - * - * Started 3/1/96 - * George - * - * $Id: kwayfm.c,v 1.3 2003/07/22 20:29:05 karypis Exp $ - */ - -#include - -#define ProperSide(c, from, other) \ - (((c) == 0 && (from)-(other) < 0) || ((c) == 1 && (from)-(other) > 0)) - -/************************************************************************* -* This function performs k-way refinement -**************************************************************************/ -void Mc_KWayFM(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, int npasses) -{ - int h, i, ii, iii, j, k, c; - int pass, nvtxs, nedges, ncon; - int nmoves, nmoved, nswaps; - int me, firstvtx, lastvtx, yourlastvtx; - int from, to = -1, oldto, oldcut, mydomain, yourdomain, imbalanced, overweight; - int npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts; - int nlupd, nsupd, nnbrs, nchanged; - idxtype *xadj, *ladjncy, *adjwgt, *vtxdist; - idxtype *where, *tmp_where, *moved; - float *lnpwgts, *gnpwgts, *ognpwgts, *pgnpwgts, *movewgts, *overfill; - idxtype *update, *supdate, *rupdate, *pe_updates; - idxtype *changed, *perm, *pperm, *htable; - idxtype *peind, *recvptr, *sendptr; - KeyValueType *swchanges, *rwchanges; - RInfoType *rinfo, *myrinfo, *tmp_myrinfo, *tmp_rinfo; - EdgeType *tmp_edegrees, *my_edegrees, *your_edegrees; - float lbvec[MAXNCON], *nvwgt, *badmaxpwgt, *ubvec, *tpwgts, lbavg, ubavg; - int *nupds_pe; - - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayTmr)); - - /*************************/ - /* set up common aliases */ - /*************************/ - nvtxs = graph->nvtxs; - nedges = graph->nedges; - ncon = graph->ncon; - - vtxdist = graph->vtxdist; - xadj = graph->xadj; - ladjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - firstvtx = vtxdist[mype]; - lastvtx = vtxdist[mype+1]; - - where = graph->where; - rinfo = graph->rinfo; - lnpwgts = graph->lnpwgts; - gnpwgts = graph->gnpwgts; - ubvec = ctrl->ubvec; - tpwgts = ctrl->tpwgts; - - nnbrs = graph->nnbrs; - peind = graph->peind; - recvptr = graph->recvptr; - sendptr = graph->sendptr; - - changed = idxmalloc(nvtxs, "KWR: changed"); - rwchanges = wspace->pairs; - swchanges = rwchanges + recvptr[nnbrs]; - - /************************************/ - /* set up important data structures */ - /************************************/ - perm = idxmalloc(nvtxs, "KWR: perm"); - pperm = idxmalloc(nparts, "KWR: pperm"); - update = idxmalloc(nvtxs, "KWR: update"); - supdate = wspace->indices; - rupdate = supdate + recvptr[nnbrs]; - nupds_pe = imalloc(npes, "KWR: nupds_pe"); - htable = idxsmalloc(nvtxs+graph->nrecv, 0, "KWR: lhtable"); - badmaxpwgt = fmalloc(nparts*ncon, "badmaxpwgt"); - - for (i=0; inrecv, "KWR: tmp_where"); - tmp_rinfo = (RInfoType *)GKmalloc(sizeof(RInfoType)*nvtxs, "KWR: tmp_rinfo"); - tmp_edegrees = (EdgeType *)GKmalloc(sizeof(EdgeType)*nedges, "KWR: tmp_edegrees"); - - idxcopy(nvtxs+graph->nrecv, where, tmp_where); - - for (i=0; icomm); - FastRandomPermute(nvtxs, perm, 1); - oldcut = graph->mincut; - - /* check to see if the partitioning is imbalanced */ - Mc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec); - ubavg = savg(ncon, ubvec); - lbavg = savg(ncon, lbvec); - imbalanced = (lbavg > ubavg) ? 1 : 0; - - for (c=0; c<2; c++) { - scopy(ncon*nparts, gnpwgts, ognpwgts); - sset(ncon*nparts, 0.0, movewgts); - nmoved = 0; - - /**********************************************/ - /* PASS ONE -- record stats for desired moves */ - /**********************************************/ - for (iii=0; iiinvwgt+i*ncon; - - for (h=0; h= tmp_rinfo[i].id) { - my_edegrees = tmp_rinfo[i].degrees; - - for (k=0; k badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0) - break; - } - - if (h == ncon) - break; - } - } - oldto = to; - - /* check if a subdomain was found that fits */ - if (k < tmp_rinfo[i].ndegrees) { - for (j=k+1; j badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0) - break; - - if (h == ncon) { - if (my_edegrees[j].ewgt > my_edegrees[k].ewgt || - (my_edegrees[j].ewgt == my_edegrees[k].ewgt && - IsHBalanceBetterTT(ncon,gnpwgts+oldto*ncon,gnpwgts+to*ncon,nvwgt,ubvec))){ - k = j; - oldto = my_edegrees[k].edge; - } - } - } - } - to = oldto; - - if (my_edegrees[k].ewgt > tmp_rinfo[i].id || - (my_edegrees[k].ewgt == tmp_rinfo[i].id && - (imbalanced || graph->level > 3 || iii % 8 == 0) && - IsHBalanceBetterFT(ncon,gnpwgts+from*ncon,gnpwgts+to*ncon,nvwgt,ubvec))) { - - /****************************************/ - /* Update tmp arrays of the moved vertex */ - /****************************************/ - tmp_where[i] = to; - moved[nmoved++] = i; - for (h=0; h= nvtxs) - continue; - - me = ladjncy[j]; - mydomain = tmp_where[me]; - - myrinfo = tmp_rinfo+me; - your_edegrees = myrinfo->degrees; - - if (mydomain == from) { - INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); - } - else { - if (mydomain == to) { - INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); - } - } - - /* Remove contribution from the .ed of 'from' */ - if (mydomain != from) { - for (k=0; kndegrees; k++) { - if (your_edegrees[k].edge == from) { - if (your_edegrees[k].ewgt == adjwgt[j]) { - myrinfo->ndegrees--; - your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge; - your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt; - } - else { - your_edegrees[k].ewgt -= adjwgt[j]; - } - break; - } - } - } - - /* Add contribution to the .ed of 'to' */ - if (mydomain != to) { - for (k=0; kndegrees; k++) { - if (your_edegrees[k].edge == to) { - your_edegrees[k].ewgt += adjwgt[j]; - break; - } - } - if (k == myrinfo->ndegrees) { - your_edegrees[myrinfo->ndegrees].edge = to; - your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j]; - } - } - } - } - } - } - } - - /******************************************/ - /* Let processors know the subdomain wgts */ - /* if all proposed moves commit. */ - /******************************************/ - MPI_Allreduce((void *)lnpwgts, (void *)pgnpwgts, nparts*ncon, - MPI_FLOAT, MPI_SUM, ctrl->comm); - - /**************************/ - /* compute overfill array */ - /**************************/ - overweight = 0; - for (j=0; j ognpwgts[j*ncon+h]) { - overfill[j*ncon+h] = (pgnpwgts[j*ncon+h]-badmaxpwgt[j*ncon+h]) / - (pgnpwgts[j*ncon+h]-ognpwgts[j*ncon+h]); - } - else { - overfill[j*ncon+h] = 0.0; - } - - overfill[j*ncon+h] = amax(overfill[j*ncon+h], 0.0); - overfill[j*ncon+h] *= movewgts[j*ncon+h]; - - if (overfill[j*ncon+h] > 0.0) - overweight = 1; - - ASSERTP(ctrl, ognpwgts[j*ncon+h] <= badmaxpwgt[j*ncon+h] || - pgnpwgts[j*ncon+h] <= ognpwgts[j*ncon+h], - (ctrl, "%.4f %.4f %.4f\n", ognpwgts[j*ncon+h], - badmaxpwgt[j*ncon+h], pgnpwgts[j*ncon+h])); - } - } - - /****************************************************/ - /* select moves to undo according to overfill array */ - /****************************************************/ - if (overweight == 1) { - for (iii=0; iiinvwgt+i*ncon; - my_edegrees = tmp_rinfo[i].degrees; - - for (k=0; k 0.0 && overfill[oldto*ncon+h] > nvwgt[h]/4.0) - break; - } - - /**********************************/ - /* nullify this move if necessary */ - /**********************************/ - if (k != tmp_rinfo[i].ndegrees && h != ncon) { - moved[iii] = -1; - from = oldto; - to = where[i]; - - for (h=0; h= nvtxs) - continue; - - me = ladjncy[j]; - mydomain = tmp_where[me]; - - myrinfo = tmp_rinfo+me; - your_edegrees = myrinfo->degrees; - - if (mydomain == from) { - INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); - } - else { - if (mydomain == to) { - INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); - } - } - - /* Remove contribution from the .ed of 'from' */ - if (mydomain != from) { - for (k=0; kndegrees; k++) { - if (your_edegrees[k].edge == from) { - if (your_edegrees[k].ewgt == adjwgt[j]) { - myrinfo->ndegrees--; - your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge; - your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt; - } - else { - your_edegrees[k].ewgt -= adjwgt[j]; - } - break; - } - } - } - - /* Add contribution to the .ed of 'to' */ - if (mydomain != to) { - for (k=0; kndegrees; k++) { - if (your_edegrees[k].edge == to) { - your_edegrees[k].ewgt += adjwgt[j]; - break; - } - } - if (k == myrinfo->ndegrees) { - your_edegrees[myrinfo->ndegrees].edge = to; - your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j]; - } - } - } - } - } - } - - - /*************************************************/ - /* PASS TWO -- commit the remainder of the moves */ - /*************************************************/ - nlupd = nsupd = nmoves = nchanged = 0; - for (iii=0; iiipexadj[i+1]-graph->pexadj[i] > 0) - changed[nchanged++] = i; - } - - /* Tell interested pe's the new where[] info for the interface vertices */ - CommChangedInterfaceData(ctrl, graph, nchanged, changed, where, - swchanges, rwchanges, wspace->pv4); - - - IFSET(ctrl->dbglvl, DBG_RMOVEINFO, - rprintf(ctrl, "\t[%d %d], [%.4f], [%d %d %d]\n", pass, c, badmaxpwgt[0], - GlobalSESum(ctrl, nmoves), - GlobalSESum(ctrl, nsupd), - GlobalSESum(ctrl, nlupd))); - - - /*------------------------------------------------------------- - / Time to communicate with processors to send the vertices - / whose degrees need to be update. - /-------------------------------------------------------------*/ - /* Issue the receives first */ - for (i=0; icomm, ctrl->rreq+i); - } - - /* Issue the sends next. This needs some preporcessing */ - for (i=0; iimap[supdate[i]]; - } - iidxsort(nsupd, supdate); - - for (j=i=0; icomm, ctrl->sreq+i); - j = k; - } - - /* OK, now get into the loop waiting for the send/recv operations to finish */ - MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); - for (i=0; istatuses+i, IDX_DATATYPE, nupds_pe+i); - MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); - - - /*------------------------------------------------------------- - / Place the recieved to-be updated vertices into update[] - /-------------------------------------------------------------*/ - for (i=0; idegrees; - your_edegrees = tmp_myrinfo->degrees; - - graph->lmincut -= myrinfo->ed; - myrinfo->ndegrees = 0; - myrinfo->id = 0; - myrinfo->ed = 0; - - for (j=xadj[i]; jed += adjwgt[j]; - - for (k=0; kndegrees; k++) { - if (my_edegrees[k].edge == yourdomain) { - my_edegrees[k].ewgt += adjwgt[j]; - your_edegrees[k].ewgt += adjwgt[j]; - break; - } - } - if (k == myrinfo->ndegrees) { - my_edegrees[k].edge = yourdomain; - my_edegrees[k].ewgt = adjwgt[j]; - your_edegrees[k].edge = yourdomain; - your_edegrees[k].ewgt = adjwgt[j]; - myrinfo->ndegrees++; - } - ASSERT(ctrl, myrinfo->ndegrees <= xadj[i+1]-xadj[i]); - ASSERT(ctrl, tmp_myrinfo->ndegrees <= xadj[i+1]-xadj[i]); - - } - else { - myrinfo->id += adjwgt[j]; - } - } - graph->lmincut += myrinfo->ed; - - tmp_myrinfo->id = myrinfo->id; - tmp_myrinfo->ed = myrinfo->ed; - tmp_myrinfo->ndegrees = myrinfo->ndegrees; - } - - /* finally, sum-up the partition weights */ - MPI_Allreduce((void *)lnpwgts, (void *)gnpwgts, nparts*ncon, - MPI_FLOAT, MPI_SUM, ctrl->comm); - } - graph->mincut = GlobalSESum(ctrl, graph->lmincut)/2; - - if (graph->mincut == oldcut) - break; - } - - GKfree((void **)&badmaxpwgt, (void **)&update, (void **)&nupds_pe, (void **)&htable, LTERM); - GKfree((void **)&changed, (void **)&pperm, (void **)&perm, (void **)&moved, LTERM); - GKfree((void **)&pgnpwgts, (void **)&ognpwgts, (void **)&overfill, (void **)&movewgts, LTERM); - GKfree((void **)&tmp_where, (void **)&tmp_rinfo, (void **)&tmp_edegrees, LTERM); - - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayTmr)); -} - - diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/kwayrefine.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/kwayrefine.c index df46d0ab..b47c6650 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/kwayrefine.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/kwayrefine.c @@ -8,7 +8,7 @@ * Started 3/1/96 * George * - * $Id: kwayrefine.c,v 1.2 2003/07/21 17:18:49 karypis Exp $ + * $Id: kwayrefine.c 10987 2011-10-31 14:42:33Z karypis $ */ #include @@ -16,17 +16,19 @@ #define ProperSide(c, from, other) \ (((c) == 0 && (from)-(other) < 0) || ((c) == 1 && (from)-(other) > 0)) + /************************************************************************* * This function projects a partition. **************************************************************************/ -void Mc_ProjectPartition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) +void ProjectPartition(ctrl_t *ctrl, graph_t *graph) { - int i, nvtxs, nnbrs = -1, firstvtx, cfirstvtx; - idxtype *match, *cmap, *where, *cwhere; - idxtype *peind, *slens = NULL, *rlens = NULL; - KeyValueType *rcand, *scand = NULL; - GraphType *cgraph; + idx_t i, nvtxs, nnbrs = -1, firstvtx, cfirstvtx; + idx_t *match, *cmap, *where, *cwhere; + idx_t *peind, *slens = NULL, *rlens = NULL; + ikv_t *rcand, *scand = NULL; + graph_t *cgraph; + WCOREPUSH; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ProjectTmr)); @@ -37,7 +39,7 @@ void Mc_ProjectPartition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace nvtxs = graph->nvtxs; match = graph->match; cmap = graph->cmap; - where = graph->where = idxmalloc(nvtxs+graph->nrecv, "ProjectPartition: graph->where"); + where = graph->where = imalloc(nvtxs+graph->nrecv, "ProjectPartition: graph->where"); firstvtx = graph->vtxdist[ctrl->mype]; @@ -45,17 +47,19 @@ void Mc_ProjectPartition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace /*------------------------------------------------------------ / Start the transmission of the remote where information /------------------------------------------------------------*/ - scand = wspace->pairs; nnbrs = graph->nnbrs; peind = graph->peind; slens = graph->slens; rlens = graph->rlens; rcand = graph->rcand; + scand = ikvwspacemalloc(ctrl, slens[nnbrs]); + /* Issue the receives first */ for (i=0; i 0) /* Issue a receive only if you are getting something */ - MPI_Irecv((void *)(scand+slens[i]), 2*(slens[i+1]-slens[i]), IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->rreq+i); + gkMPI_Irecv((void *)(scand+slens[i]), 2*(slens[i+1]-slens[i]), IDX_T, + peind[i], 1, ctrl->comm, ctrl->rreq+i); } #ifdef DEBUG_PROJECT @@ -64,7 +68,7 @@ void Mc_ProjectPartition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace /* Put the where[rcand[].key] into the val field */ for (i=0; i= 0 && rcand[i].val < cgraph->nvtxs); + PASSERT(ctrl, rcand[i].val >= 0 && rcand[i].val < cgraph->nvtxs); rcand[i].val = cwhere[rcand[i].val]; } @@ -76,7 +80,8 @@ void Mc_ProjectPartition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace /* Issue the sends next */ for (i=0; i 0) /* Issue a send only if you are sending something */ - MPI_Isend((void *)(rcand+rlens[i]), 2*(rlens[i+1]-rlens[i]), IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->sreq+i); + gkMPI_Isend((void *)(rcand+rlens[i]), 2*(rlens[i+1]-rlens[i]), IDX_T, + peind[i], 1, ctrl->comm, ctrl->sreq+i); } } @@ -85,7 +90,7 @@ void Mc_ProjectPartition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace /------------------------------------------------------------*/ for (i=0; i= KEEP_BIT) { - ASSERT(ctrl, cmap[i]-cfirstvtx>=0 && cmap[i]-cfirstvtxnvtxs); + PASSERT(ctrl, cmap[i]-cfirstvtx>=0 && cmap[i]-cfirstvtxnvtxs); where[i] = cwhere[cmap[i]-cfirstvtx]; } } @@ -96,11 +101,11 @@ void Mc_ProjectPartition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace /------------------------------------------------------------*/ for (i=0; i 0) - MPI_Wait(ctrl->sreq+i, &ctrl->status); + gkMPI_Wait(ctrl->sreq+i, &ctrl->status); } for (i=0; i 0) - MPI_Wait(ctrl->rreq+i, &ctrl->status); + gkMPI_Wait(ctrl->rreq+i, &ctrl->status); } #ifdef DEBUG_PROJECT @@ -111,31 +116,32 @@ void Mc_ProjectPartition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace / Project received vertices now /------------------------------------------------------------*/ for (i=0; i=0 && scand[i].key-firstvtxnvtxs, (ctrl, "%d %d %d\n", scand[i].key, firstvtx, graph->nvtxs)); + PASSERTP(ctrl, scand[i].key-firstvtx>=0 && scand[i].key-firstvtxnvtxs, + (ctrl, "%"PRIDX" %"PRIDX" %"PRIDX"\n", scand[i].key, firstvtx, graph->nvtxs)); where[scand[i].key-firstvtx] = scand[i].val; } } - FreeGraph(graph->coarser); graph->coarser = NULL; + WCOREPOP; + IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ProjectTmr)); } - /************************************************************************* * This function computes the initial id/ed **************************************************************************/ -void Mc_ComputePartitionParams(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) +void ComputePartitionParams(ctrl_t *ctrl, graph_t *graph) { - int h, i, j, k, nvtxs, ncon, firstvtx, lastvtx; - idxtype *xadj, *ladjncy, *adjwgt, *vtxdist, *where; - float *lnpwgts, *gnpwgts; - RInfoType *rinfo, *myrinfo; - EdgeType *edegrees; - int me, other; + idx_t h, i, j, k, nvtxs, ncon, firstvtx, lastvtx; + idx_t *xadj, *adjncy, *adjwgt, *vtxdist, *where; + real_t *lnpwgts, *gnpwgts; + ckrinfo_t *myrinfo; + cnbr_t *mynbrs; + idx_t me, other; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayInitTmr)); @@ -143,44 +149,46 @@ void Mc_ComputePartitionParams(CtrlType *ctrl, GraphType *graph, WorkSpaceType * ncon = graph->ncon; vtxdist = graph->vtxdist; xadj = graph->xadj; - ladjncy = graph->adjncy; + adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; - rinfo = graph->rinfo = (RInfoType *)GKmalloc(sizeof(RInfoType)*nvtxs, "CPP: rinfo"); - lnpwgts = graph->lnpwgts = fsmalloc(ctrl->nparts*ncon, 0.0, "CPP: lnpwgts"); - gnpwgts = graph->gnpwgts = fmalloc(ctrl->nparts*ncon, "CPP: gnpwgts"); + graph->ckrinfo = (ckrinfo_t *)gk_malloc(sizeof(ckrinfo_t)*nvtxs, "CPP: ckrinfo"); + memset(graph->ckrinfo, 0, sizeof(ckrinfo_t)*nvtxs); + + lnpwgts = graph->lnpwgts = rsmalloc(ctrl->nparts*ncon, 0.0, "CPP: lnpwgts"); + gnpwgts = graph->gnpwgts = rmalloc(ctrl->nparts*ncon, "CPP: gnpwgts"); firstvtx = vtxdist[ctrl->mype]; lastvtx = vtxdist[ctrl->mype+1]; - /*------------------------------------------------------------ - / Send/Receive the where information of interface vertices - /------------------------------------------------------------*/ - CommInterfaceData(ctrl, graph, where, wspace->indices, where+nvtxs); + /* Send/Receive the where information of interface vertices */ + CommInterfaceData(ctrl, graph, where, where+nvtxs); #ifdef DEBUG_COMPUTEPPARAM - PrintVector(ctrl, nvtxs, firstvtx, where, "where"); + rprintf(ctrl, "where sum: %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"\n", + GlobalSESum(ctrl, nvtxs+graph->nrecv), + GlobalSESum(ctrl, isum(nvtxs+graph->nrecv, where, 1)), + GlobalSESum(ctrl, isum(nvtxs+1, xadj, 1)), + GlobalSESum(ctrl, isum(graph->nedges, adjncy, 1)), + GlobalSESum(ctrl, isum(graph->nedges, adjwgt, 1)) + ); + + PrintVector(ctrl, nvtxs+graph->nrecv, 0, where, "where"); #endif - /*------------------------------------------------------------ - / Compute now the id/ed degrees - /------------------------------------------------------------*/ - ASSERT(ctrl, wspace->nlarge >= xadj[nvtxs]); + /* Compute now the id/ed degrees */ graph->lmincut = 0; for (i=0; ickrinfo+i; for (h=0; hnvwgt[i*ncon+h]; - myrinfo->degrees = wspace->degrees + xadj[i]; - myrinfo->ndegrees = myrinfo->id = myrinfo->ed = 0; - for (j=xadj[i]; jid += adjwgt[j]; else myrinfo->ed += adjwgt[j]; @@ -189,26 +197,31 @@ void Mc_ComputePartitionParams(CtrlType *ctrl, GraphType *graph, WorkSpaceType * if (myrinfo->ed > 0) { /* Time to do some serious work */ graph->lmincut += myrinfo->ed; - edegrees = myrinfo->degrees; + + myrinfo->inbr = cnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]); + mynbrs = ctrl->cnbrpool + myrinfo->inbr; for (j=xadj[i]; jndegrees; k++) { - if (edegrees[k].edge == other) { - edegrees[k].ewgt += adjwgt[j]; + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == other) { + mynbrs[k].ed += adjwgt[j]; break; } } - if (k == myrinfo->ndegrees) { - edegrees[k].edge = other; - edegrees[k].ewgt = adjwgt[j]; - myrinfo->ndegrees++; + if (k == myrinfo->nnbrs) { + mynbrs[k].pid = other; + mynbrs[k].ed = adjwgt[j]; + myrinfo->nnbrs++; } - ASSERT(ctrl, myrinfo->ndegrees <= xadj[i+1]-xadj[i]); + PASSERT(ctrl, myrinfo->nnbrs <= xadj[i+1]-xadj[i]); } } } + else { + myrinfo->inbr = -1; + } } #ifdef DEBUG_COMPUTEPPARAM @@ -216,7 +229,8 @@ void Mc_ComputePartitionParams(CtrlType *ctrl, GraphType *graph, WorkSpaceType * #endif /* Finally, sum-up the partition weights */ - MPI_Allreduce((void *)lnpwgts, (void *)gnpwgts, ctrl->nparts*ncon, MPI_FLOAT, MPI_SUM, ctrl->comm); + gkMPI_Allreduce((void *)lnpwgts, (void *)gnpwgts, ctrl->nparts*ncon, + REAL_T, MPI_SUM, ctrl->comm); graph->mincut = GlobalSESum(ctrl, graph->lmincut)/2; @@ -227,3 +241,945 @@ void Mc_ComputePartitionParams(CtrlType *ctrl, GraphType *graph, WorkSpaceType * IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayInitTmr)); } + +/************************************************************************* +* This function performs k-way refinement +**************************************************************************/ +void KWayFM(ctrl_t *ctrl, graph_t *graph, idx_t npasses) +{ + idx_t from, to = -1, oldto, oldcut, mydomain, yourdomain, imbalanced, overweight; + idx_t h, i, ii, iii, j, k, c; + idx_t pass, nvtxs, nedges, ncon; + idx_t nmoves, nmoved, nswaps; + idx_t me, firstvtx, lastvtx, yourlastvtx; + idx_t npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts; + idx_t nlupd, nsupd, nnbrs, nchanged; + idx_t *xadj, *adjncy, *adjwgt, *vtxdist; + idx_t *where, *tmp_where, *moved, *oldEDs; + real_t *lnpwgts, *gnpwgts, *ognpwgts, *pgnpwgts, *movewgts, *overfill; + idx_t *update, *supdate, *rupdate, *pe_updates; + idx_t *changed, *perm, *pperm, *htable; + idx_t *peind, *recvptr, *sendptr; + ikv_t *swchanges, *rwchanges; + ckrinfo_t *myrinfo; + cnbr_t *mynbrs; + real_t *lbvec, *nvwgt, *badmaxpwgt, *ubvec, *tpwgts, lbavg, ubavg; + idx_t *nupds_pe; + + IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayTmr)); + WCOREPUSH; + + /*************************/ + /* set up common aliases */ + /*************************/ + nvtxs = graph->nvtxs; + nedges = graph->nedges; + ncon = graph->ncon; + + vtxdist = graph->vtxdist; + xadj = graph->xadj; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + + firstvtx = vtxdist[mype]; + lastvtx = vtxdist[mype+1]; + + where = graph->where; + lnpwgts = graph->lnpwgts; + gnpwgts = graph->gnpwgts; + + ubvec = ctrl->ubvec; + tpwgts = ctrl->tpwgts; + + nnbrs = graph->nnbrs; + peind = graph->peind; + recvptr = graph->recvptr; + sendptr = graph->sendptr; + + /************************************/ + /* set up important data structures */ + /************************************/ + lbvec = rwspacemalloc(ctrl, ncon); + badmaxpwgt = rwspacemalloc(ctrl, nparts*ncon); + movewgts = rwspacemalloc(ctrl, nparts*ncon); + ognpwgts = rwspacemalloc(ctrl, nparts*ncon); + pgnpwgts = rwspacemalloc(ctrl, nparts*ncon); + overfill = rwspacemalloc(ctrl, nparts*ncon); + + pperm = iwspacemalloc(ctrl, nparts); + nupds_pe = iwspacemalloc(ctrl, npes); + + oldEDs = iwspacemalloc(ctrl, nvtxs); + changed = iwspacemalloc(ctrl, nvtxs); + perm = iwspacemalloc(ctrl, nvtxs); + update = iwspacemalloc(ctrl, nvtxs); + moved = iwspacemalloc(ctrl, nvtxs); + htable = iset(nvtxs+graph->nrecv, 0, iwspacemalloc(ctrl, nvtxs+graph->nrecv)); + + rwchanges = ikvwspacemalloc(ctrl, graph->nrecv); + swchanges = ikvwspacemalloc(ctrl, graph->nsend); + supdate = iwspacemalloc(ctrl, graph->nrecv); + rupdate = iwspacemalloc(ctrl, graph->nsend); + + tmp_where = iwspacemalloc(ctrl, nvtxs+graph->nrecv); + + for (i=0; inrecv, where, tmp_where); + + /* this will record the overall external degrees of the vertices + prior to a inner refinement iteration in order to allow for + the proper updating of the lmincut */ + for (i=0; ickrinfo[i].ed; + + + /*********************************************************/ + /* perform a small number of passes through the vertices */ + /*********************************************************/ + for (nswaps=0, pass=0; passcomm); + FastRandomPermute(nvtxs, perm, 1); + oldcut = graph->mincut; + + /* check to see if the partitioning is imbalanced */ + ComputeParallelBalance(ctrl, graph, graph->where, lbvec); + ubavg = ravg(ncon, ubvec); + lbavg = ravg(ncon, lbvec); + imbalanced = (lbavg > ubavg) ? 1 : 0; + + for (c=0; c<2; c++) { + rcopy(ncon*nparts, gnpwgts, ognpwgts); + rset(ncon*nparts, 0.0, movewgts); + nmoved = 0; + + /**********************************************/ + /* PASS ONE -- record stats for desired moves */ + /**********************************************/ + for (iii=0; iiinvwgt+i*ncon; + + for (h=0; hckrinfo + i; + if (myrinfo->ed == 0 || myrinfo->ed < myrinfo->id) + continue; + + PASSERT(ctrl, myrinfo->inbr != -1); + mynbrs = ctrl->cnbrpool + myrinfo->inbr; + + for (k=myrinfo->nnbrs-1; k>=0; k--) { + to = mynbrs[k].pid; + if (ProperSide(c, pperm[from], pperm[to])) { + for (h=0; h badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0) + break; + } + + if (h == ncon) + break; + } + } + + /* break out if you did not find a candidate */ + if (k < 0) + continue; + + oldto = to; + for (j=k-1; j>=0; j--) { + to = mynbrs[j].pid; + if (ProperSide(c, pperm[from], pperm[to])) { + for (h=0; h badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0) + break; + } + + if (h == ncon) { + if (mynbrs[j].ed > mynbrs[k].ed || + (mynbrs[j].ed == mynbrs[k].ed && + IsHBalanceBetterTT(ncon,gnpwgts+oldto*ncon,gnpwgts+to*ncon,nvwgt,ubvec))){ + oldto = to; + k = j; + } + } + } + } + to = oldto; + + if (mynbrs[k].ed > myrinfo->id || + (mynbrs[k].ed == myrinfo->id && + (imbalanced || graph->level > 3 || iii % 8 == 0) && + IsHBalanceBetterFT(ncon,gnpwgts+from*ncon,gnpwgts+to*ncon,nvwgt,ubvec))) { + + /****************************************/ + /* Update tmp arrays of the moved vertex */ + /****************************************/ + tmp_where[i] = to; + moved[nmoved++] = i; + for (h=0; hed += myrinfo->id-mynbrs[k].ed; + gk_SWAP(myrinfo->id, mynbrs[k].ed, j); + if (mynbrs[k].ed == 0) + mynbrs[k] = mynbrs[--myrinfo->nnbrs]; + else + mynbrs[k].pid = from; + + /* Update the degrees of adjacent vertices */ + for (j=xadj[i]; j= nvtxs) + continue; + + me = adjncy[j]; + mydomain = tmp_where[me]; + + myrinfo = graph->ckrinfo+me; + if (myrinfo->inbr == -1) { + myrinfo->inbr = cnbrpoolGetNext(ctrl, xadj[me+1]-xadj[me]); + myrinfo->nnbrs = 0; + } + mynbrs = ctrl->cnbrpool + myrinfo->inbr; + + if (mydomain == from) { + INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); + } + else { + if (mydomain == to) { + INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); + } + } + + /* Remove contribution from the .ed of 'from' */ + if (mydomain != from) { + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == from) { + if (mynbrs[k].ed == adjwgt[j]) + mynbrs[k] = mynbrs[--myrinfo->nnbrs]; + else + mynbrs[k].ed -= adjwgt[j]; + break; + } + } + } + + /* Add contribution to the .ed of 'to' */ + if (mydomain != to) { + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == to) { + mynbrs[k].ed += adjwgt[j]; + break; + } + } + if (k == myrinfo->nnbrs) { + mynbrs[k].pid = to; + mynbrs[k].ed = adjwgt[j]; + myrinfo->nnbrs++; + } + } + } + } + } + + /******************************************/ + /* Let processors know the subdomain wgts */ + /* if all proposed moves commit. */ + /******************************************/ + gkMPI_Allreduce((void *)lnpwgts, (void *)pgnpwgts, nparts*ncon, REAL_T, + MPI_SUM, ctrl->comm); + + /**************************/ + /* compute overfill array */ + /**************************/ + overweight = 0; + for (j=0; j ognpwgts[j*ncon+h]) { + overfill[j*ncon+h] = (pgnpwgts[j*ncon+h]-badmaxpwgt[j*ncon+h]) / + (pgnpwgts[j*ncon+h]-ognpwgts[j*ncon+h]); + } + else { + overfill[j*ncon+h] = 0.0; + } + + overfill[j*ncon+h] = gk_max(overfill[j*ncon+h], 0.0); + overfill[j*ncon+h] *= movewgts[j*ncon+h]; + + if (overfill[j*ncon+h] > 0.0) + overweight = 1; + + PASSERTP(ctrl, ognpwgts[j*ncon+h] <= badmaxpwgt[j*ncon+h] || + pgnpwgts[j*ncon+h] <= ognpwgts[j*ncon+h], + (ctrl, "%.4"PRREAL" %.4"PRREAL" %.4"PRREAL"\n", ognpwgts[j*ncon+h], + badmaxpwgt[j*ncon+h], pgnpwgts[j*ncon+h])); + } + } + + /****************************************************/ + /* select moves to undo according to overfill array */ + /****************************************************/ + if (overweight == 1) { + for (iii=0; iiinvwgt+i*ncon; + + myrinfo = graph->ckrinfo + i; + mynbrs = ctrl->cnbrpool + myrinfo->inbr; + PASSERT(ctrl, myrinfo->nnbrs == 0 || myrinfo->inbr != -1); + + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == where[i]) + break; + } + + for (h=0; h 0.0 && overfill[oldto*ncon+h] > nvwgt[h]/4.0) + break; + } + + /**********************************/ + /* nullify this move if necessary */ + /**********************************/ + if (k != myrinfo->nnbrs && h != ncon) { + moved[iii] = -1; + from = oldto; + to = where[i]; + + for (h=0; hed += myrinfo->id-mynbrs[k].ed; + gk_SWAP(myrinfo->id, mynbrs[k].ed, j); + if (mynbrs[k].ed == 0) + mynbrs[k] = mynbrs[--myrinfo->nnbrs]; + else + mynbrs[k].pid = from; + + for (h=0; h= nvtxs) + continue; + + me = adjncy[j]; + mydomain = tmp_where[me]; + + myrinfo = graph->ckrinfo+me; + if (myrinfo->inbr == -1) { + myrinfo->inbr = cnbrpoolGetNext(ctrl, xadj[me+1]-xadj[me]); + myrinfo->nnbrs = 0; + } + mynbrs = ctrl->cnbrpool + myrinfo->inbr; + + if (mydomain == from) { + INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); + } + else { + if (mydomain == to) { + INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); + } + } + + /* Remove contribution from the .ed of 'from' */ + if (mydomain != from) { + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == from) { + if (mynbrs[k].ed == adjwgt[j]) + mynbrs[k] = mynbrs[--myrinfo->nnbrs]; + else + mynbrs[k].ed -= adjwgt[j]; + break; + } + } + } + + /* Add contribution to the .ed of 'to' */ + if (mydomain != to) { + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == to) { + mynbrs[k].ed += adjwgt[j]; + break; + } + } + if (k == myrinfo->nnbrs) { + mynbrs[k].pid = to; + mynbrs[k].ed = adjwgt[j]; + myrinfo->nnbrs++; + } + } + } + } + } + } + + + /*************************************************/ + /* PASS TWO -- commit the remainder of the moves */ + /*************************************************/ + nlupd = nsupd = nmoves = nchanged = 0; + for (iii=0; iiipexadj[i+1]-graph->pexadj[i] > 0) + changed[nchanged++] = i; + } + + /* Tell interested pe's the new where[] info for the interface vertices */ + CommChangedInterfaceData(ctrl, graph, nchanged, changed, where, + swchanges, rwchanges); + + + IFSET(ctrl->dbglvl, DBG_RMOVEINFO, rprintf(ctrl, + "\t[%"PRIDX" %"PRIDX"], [%.4"PRREAL"], [%"PRIDX" %"PRIDX" %"PRIDX"]\n", + pass, c, badmaxpwgt[0], GlobalSESum(ctrl, nmoves), + GlobalSESum(ctrl, nsupd), GlobalSESum(ctrl, nlupd))); + + + /*------------------------------------------------------------- + / Time to communicate with processors to send the vertices + / whose degrees need to be update. + /-------------------------------------------------------------*/ + /* Issue the receives first */ + for (i=0; icomm, ctrl->rreq+i); + } + + /* Issue the sends next. This needs some preporcessing */ + for (i=0; iimap[supdate[i]]; + } + isorti(nsupd, supdate); + + for (j=i=0; icomm, ctrl->sreq+i); + j = k; + } + + /* OK, now get into the loop waiting for the send/recv operations to finish */ + gkMPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); + for (i=0; istatuses+i, IDX_T, nupds_pe+i); + gkMPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); + + + /*------------------------------------------------------------- + / Place the recieved to-be updated vertices into update[] + /-------------------------------------------------------------*/ + for (i=0; ickrinfo+i; + + if (myrinfo->inbr == -1) + myrinfo->inbr = cnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]); + mynbrs = ctrl->cnbrpool + myrinfo->inbr; + + graph->lmincut -= oldEDs[i]; + myrinfo->nnbrs = 0; + myrinfo->id = 0; + myrinfo->ed = 0; + + for (j=xadj[i]; jed += adjwgt[j]; + + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == yourdomain) { + mynbrs[k].ed += adjwgt[j]; + break; + } + } + if (k == myrinfo->nnbrs) { + mynbrs[k].pid = yourdomain; + mynbrs[k].ed = adjwgt[j]; + myrinfo->nnbrs++; + } + PASSERT(ctrl, myrinfo->nnbrs <= xadj[i+1]-xadj[i]); + } + else { + myrinfo->id += adjwgt[j]; + } + } + graph->lmincut += myrinfo->ed; + oldEDs[i] = myrinfo->ed; /* for the next iteration */ + } + + /* finally, sum-up the partition weights */ + gkMPI_Allreduce((void *)lnpwgts, (void *)gnpwgts, nparts*ncon, + REAL_T, MPI_SUM, ctrl->comm); + } + graph->mincut = GlobalSESum(ctrl, graph->lmincut)/2; + + IFSET(ctrl->dbglvl, DBG_RMOVEINFO, + rprintf(ctrl, "\t\tcut: %"PRIDX"\n", graph->mincut)); + + if (graph->mincut == oldcut) + break; + } + + WCOREPOP; + IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayTmr)); +} + + +/************************************************************************* +* This function performs k-way refinement +**************************************************************************/ +void KWayBalance(ctrl_t *ctrl, graph_t *graph, idx_t npasses) +{ + idx_t h, i, ii, iii, j, k, c; + idx_t pass, nvtxs, nedges, ncon; + idx_t nmoves, nmoved, nswaps; + idx_t me, firstvtx, lastvtx, yourlastvtx; + idx_t from, to = -1, oldto, oldcut, mydomain, yourdomain, imbalanced; + idx_t npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts; + idx_t nlupd, nsupd, nnbrs, nchanged; + idx_t *xadj, *adjncy, *adjwgt, *vtxdist; + idx_t *where, *tmp_where, *moved, *oldEDs; + real_t *lnpwgts, *gnpwgts; + idx_t *update, *supdate, *rupdate, *pe_updates; + idx_t *changed, *perm, *pperm, *htable; + idx_t *peind, *recvptr, *sendptr; + ikv_t *swchanges, *rwchanges; + ckrinfo_t *myrinfo; + cnbr_t *mynbrs; + real_t *lbvec, *nvwgt, *badmaxpwgt, *ubvec, *tpwgts, lbavg, ubavg; + idx_t *nupds_pe; + + IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayTmr)); + WCOREPUSH; + + /*************************/ + /* set up common aliases */ + /*************************/ + nvtxs = graph->nvtxs; + nedges = graph->nedges; + ncon = graph->ncon; + xadj = graph->xadj; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + where = graph->where; + lnpwgts = graph->lnpwgts; + gnpwgts = graph->gnpwgts; + + nnbrs = graph->nnbrs; + peind = graph->peind; + recvptr = graph->recvptr; + sendptr = graph->sendptr; + + vtxdist = graph->vtxdist; + firstvtx = vtxdist[mype]; + lastvtx = vtxdist[mype+1]; + + ubvec = ctrl->ubvec; + tpwgts = ctrl->tpwgts; + + + /************************************/ + /* set up important data structures */ + /************************************/ + lbvec = rwspacemalloc(ctrl, ncon); + badmaxpwgt = rwspacemalloc(ctrl, nparts*ncon); + + pperm = iwspacemalloc(ctrl, nparts); + nupds_pe = iwspacemalloc(ctrl, npes); + + oldEDs = iwspacemalloc(ctrl, nvtxs); + changed = iwspacemalloc(ctrl, nvtxs); + perm = iwspacemalloc(ctrl, nvtxs); + update = iwspacemalloc(ctrl, nvtxs); + moved = iwspacemalloc(ctrl, nvtxs); + htable = iset(nvtxs+graph->nrecv, 0, iwspacemalloc(ctrl, nvtxs+graph->nrecv)); + + rwchanges = ikvwspacemalloc(ctrl, graph->nrecv); + swchanges = ikvwspacemalloc(ctrl, graph->nsend); + supdate = iwspacemalloc(ctrl, graph->nrecv); + rupdate = iwspacemalloc(ctrl, graph->nsend); + + tmp_where = iwspacemalloc(ctrl, nvtxs+graph->nrecv); + + for (i=0; inrecv, where, tmp_where); + + /* this will record the overall external degrees of the vertices + prior to a inner refinement iteration in order to allow for + the proper updating of the lmincut */ + for (i=0; ickrinfo[i].ed; + + + /*********************************************************/ + /* perform a small number of passes through the vertices */ + /*********************************************************/ + for (nswaps=0, pass=0; passmincut; + if (mype == 0) + RandomPermute(nparts, pperm, 1); + gkMPI_Bcast((void *)pperm, nparts, IDX_T, 0, ctrl->comm); + FastRandomPermute(nvtxs, perm, 1); + + /* check to see if the partitioning is imbalanced */ + ComputeParallelBalance(ctrl, graph, graph->where, lbvec); + ubavg = ravg(ncon, ubvec); + lbavg = ravg(ncon, lbvec); + imbalanced = (lbavg > ubavg) ? 1 : 0; + + for (c=0; c<2; c++) { + nmoved = 0; + + /**********************************************/ + /* PASS ONE -- record stats for desired moves */ + /**********************************************/ + for (iii=0; iiinvwgt+i*ncon; + + for (h=0; hckrinfo+i; + if (myrinfo->ed == 0 || myrinfo->ed < myrinfo->id) + continue; + + PASSERT(ctrl, myrinfo->inbr != -1); + mynbrs = ctrl->cnbrpool + myrinfo->inbr; + + for (k=myrinfo->nnbrs-1; k>=0; k--) { + to = mynbrs[k].pid; + if (ProperSide(c, pperm[from], pperm[to]) && + IsHBalanceBetterFT(ncon, gnpwgts+from*ncon, gnpwgts+to*ncon, nvwgt, ubvec)) { + break; + } + } + + /* break out if you did not find a candidate */ + if (k < 0) + continue; + + oldto = to; + for (j=k-1; j>=0; j--) { + to = mynbrs[j].pid; + if (ProperSide(c, pperm[from], pperm[to]) && + IsHBalanceBetterTT(ncon, gnpwgts+oldto*ncon, gnpwgts+to*ncon, nvwgt, ubvec)){ + oldto = to; + k = j; + } + } + to = oldto; + + if (iii % npes == 0) { + /****************************************/ + /* Update tmp arrays of the moved vertex */ + /****************************************/ + tmp_where[i] = to; + moved[nmoved++] = i; + for (h=0; hed += myrinfo->id-mynbrs[k].ed; + gk_SWAP(myrinfo->id, mynbrs[k].ed, j); + if (mynbrs[k].ed == 0) + mynbrs[k] = mynbrs[--myrinfo->nnbrs]; + else + mynbrs[k].pid = from; + + /* Update the degrees of adjacent vertices */ + for (j=xadj[i]; j= nvtxs) + continue; + + me = adjncy[j]; + mydomain = tmp_where[me]; + + myrinfo = graph->ckrinfo+me; + if (myrinfo->inbr == -1) { + myrinfo->inbr = cnbrpoolGetNext(ctrl, xadj[me+1]-xadj[me]); + myrinfo->nnbrs = 0; + } + mynbrs = ctrl->cnbrpool + myrinfo->inbr; + + if (mydomain == from) { + INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); + } + else { + if (mydomain == to) { + INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); + } + } + + /* Remove contribution from the .ed of 'from' */ + if (mydomain != from) { + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == from) { + if (mynbrs[k].ed == adjwgt[j]) + mynbrs[k] = mynbrs[--myrinfo->nnbrs]; + else + mynbrs[k].ed -= adjwgt[j]; + break; + } + } + } + + /* Add contribution to the .ed of 'to' */ + if (mydomain != to) { + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == to) { + mynbrs[k].ed += adjwgt[j]; + break; + } + } + if (k == myrinfo->nnbrs) { + mynbrs[k].pid = to; + mynbrs[k].ed = adjwgt[j]; + myrinfo->nnbrs++; + } + } + } + } + } + + /*************************************************/ + /* PASS TWO -- commit the remainder of the moves */ + /*************************************************/ + nlupd = nsupd = nmoves = nchanged = 0; + for (iii=0; iiipexadj[i+1]-graph->pexadj[i] > 0) + changed[nchanged++] = i; + } + + /* Tell interested pe's the new where[] info for the interface vertices */ + CommChangedInterfaceData(ctrl, graph, nchanged, changed, where, swchanges, + rwchanges); + + IFSET(ctrl->dbglvl, DBG_RMOVEINFO, rprintf(ctrl, + "\t[%"PRIDX" %"PRIDX"], [%.4"PRREAL"], [%"PRIDX" %"PRIDX" %"PRIDX"]\n", + pass, c, badmaxpwgt[0], GlobalSESum(ctrl, nmoves), + GlobalSESum(ctrl, nsupd), GlobalSESum(ctrl, nlupd))); + + /*------------------------------------------------------------- + / Time to communicate with processors to send the vertices + / whose degrees need to be update. + /-------------------------------------------------------------*/ + /* Issue the receives first */ + for (i=0; icomm, ctrl->rreq+i); + } + + /* Issue the sends next. This needs some preporcessing */ + for (i=0; iimap[supdate[i]]; + } + isorti(nsupd, supdate); + + for (j=i=0; icomm, ctrl->sreq+i); + j = k; + } + + /* OK, now get into the loop waiting for the send/recv operations to finish */ + gkMPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); + for (i=0; istatuses+i, IDX_T, nupds_pe+i); + gkMPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); + + + /*------------------------------------------------------------- + / Place the recieved to-be updated vertices into update[] + /-------------------------------------------------------------*/ + for (i=0; ickrinfo+i; + if (myrinfo->inbr == -1) + myrinfo->inbr = cnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]); + mynbrs = ctrl->cnbrpool + myrinfo->inbr; + + graph->lmincut -= oldEDs[i]; + myrinfo->nnbrs = 0; + myrinfo->id = 0; + myrinfo->ed = 0; + + for (j=xadj[i]; jed += adjwgt[j]; + + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == yourdomain) { + mynbrs[k].ed += adjwgt[j]; + break; + } + } + if (k == myrinfo->nnbrs) { + mynbrs[k].pid = yourdomain; + mynbrs[k].ed = adjwgt[j]; + myrinfo->nnbrs++; + } + PASSERT(ctrl, myrinfo->nnbrs <= xadj[i+1]-xadj[i]); + } + else { + myrinfo->id += adjwgt[j]; + } + } + graph->lmincut += myrinfo->ed; + oldEDs[i] = myrinfo->ed; /* for the next iteration */ + } + + /* finally, sum-up the partition weights */ + gkMPI_Allreduce((void *)lnpwgts, (void *)gnpwgts, nparts*ncon, + REAL_T, MPI_SUM, ctrl->comm); + } + graph->mincut = GlobalSESum(ctrl, graph->lmincut)/2; + + if (graph->mincut == oldcut) + break; + } + +/* + gnswaps = GlobalSESum(ctrl, nswaps); + if (mype == 0) + printf("niters: %"PRIDX", nswaps: %"PRIDX"\n", pass+1, gnswaps); +*/ + + WCOREPOP; + IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayTmr)); +} + + diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/macros.h b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/macros.h index 2e42ee68..98a7f5d0 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/macros.h +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/macros.h @@ -8,106 +8,51 @@ * Started 9/25/94 * George * - * $Id: macros.h,v 1.8 2003/07/21 19:11:46 karypis Exp $ + * $Id: macros.h 10578 2011-07-14 18:10:15Z karypis $ * */ -/************************************************************************* -* The following macro returns a random number in the specified range -**************************************************************************/ -#define RandomInRange(u) ((int)(1.0*(u)*rand()/(RAND_MAX+1.0))) -/* #define RandomInRange(u) (rand()%(u)) */ - -#define amax(a, b) ((a) >= (b) ? (a) : (b)) -#define amin(a, b) ((a) >= (b) ? (b) : (a)) - +/* The following macro returns a random number in the specified range */ #define AND(a, b) ((a) < 0 ? ((-(a))&(b)) : ((a)&(b))) #define OR(a, b) ((a) < 0 ? -((-(a))|(b)) : ((a)|(b))) #define XOR(a, b) ((a) < 0 ? -((-(a))^(b)) : ((a)^(b))) -#define SWAP(a, b, tmp) \ - do {(tmp) = (a); (a) = (b); (b) = (tmp);} while(0) - -#define INC_DEC(a, b, val) \ - do {(a) += (val); (b) -= (val);} while(0) - - -#define icopy(n, a, b) memcpy((b), (a), sizeof(int)*(n)) -#define scopy(n, a, b) memcpy((b), (a), sizeof(float)*(n)) -#define idxcopy(n, a, b) memcpy((b), (a), sizeof(idxtype)*(n)) - #define HASHFCT(key, size) ((key)%(size)) +/* set/reset the current workspace core */ +#define WCOREPUSH do {PASSERT(ctrl,ctrl->mcore!=NULL); gk_mcorePush(ctrl->mcore);}while(0) +#define WCOREPOP do {PASSERT(ctrl,ctrl->mcore!=NULL); gk_mcorePop(ctrl->mcore);}while(0) -/************************************************************************* -* Timer macros -**************************************************************************/ + +/* Timer macros */ #define cleartimer(tmr) (tmr = 0.0) #define starttimer(tmr) (tmr -= MPI_Wtime()) #define stoptimer(tmr) (tmr += MPI_Wtime()) #define gettimer(tmr) (tmr) - - -/************************************************************************* -* This macro is used to handle dbglvl -**************************************************************************/ -#define IFSET(a, flag, cmd) if ((a)&(flag)) (cmd); - -/************************************************************************* -* These macros are used for debuging memory leaks -**************************************************************************/ -#ifdef DMALLOC -#define imalloc(n, msg) (malloc(sizeof(int)*(n))) -#define fmalloc(n, msg) (malloc(sizeof(float)*(n))) -#define idxmalloc(n, msg) (malloc(sizeof(idxtype)*(n))) -#define ismalloc(n, val, msg) (iset((n), (val), malloc(sizeof(int)*(n)))) -#define idxsmalloc(n, val, msg) (idxset((n), (val), malloc(sizeof(idxtype)*(n)))) -#define GKmalloc(a, b) (malloc(a)) -#endif - -#ifdef DMALLOC -# define MALLOC_CHECK(ptr); -/* -# define MALLOC_CHECK(ptr) \ - if (malloc_verify((ptr)) == DMALLOC_VERIFY_ERROR) { \ - printf("***MALLOC_CHECK failed on line %d of file %s: " #ptr "\n", \ - __LINE__, __FILE__); \ - abort(); \ - } -*/ -#else -# define MALLOC_CHECK(ptr) ; -#endif - -/************************************************************************* -* This macro converts a length array in a CSR one -**************************************************************************/ -#define MAKECSR(i, n, a) \ +#define STARTTIMER(ctrl, tmr) \ do { \ - for (i=1; i0; i--) a[i] = a[i-1]; \ - a[0] = 0; \ - } while(0) - - -#define SHIFTCSR(i, n, a) \ + IFSET((ctrl)->dbglvl, DBG_TIME, gkMPI_Barrier((ctrl)->gcomm));\ + IFSET((ctrl)->dbglvl, DBG_TIME, starttimer((tmr))); \ + } while (0) +#define STOPTIMER(ctrl, tmr) \ do { \ - for (i=n; i>0; i--) a[i] = a[i-1]; \ - a[0] = 0; \ - } while(0) + IFSET((ctrl)->dbglvl, DBG_TIME, gkMPI_Barrier((ctrl)->gcomm));\ + IFSET((ctrl)->dbglvl, DBG_TIME, stoptimer((tmr))); \ + } while (0) +/* Debugging macros */ #ifndef NDEBUG -# define ASSERT(ctrl, expr) \ +# define PASSERT(ctrl, expr) \ if (!(expr)) { \ myprintf(ctrl, "***ASSERTION failed on line %d of file %s: " #expr "\n", \ __LINE__, __FILE__); \ assert(expr); \ } -# define ASSERTP(ctrl, expr, msg) \ +# define PASSERTP(ctrl, expr, msg) \ if (!(expr)) { \ myprintf(ctrl, "***ASSERTION failed on line %d of file %s:" #expr "\n", \ __LINE__, __FILE__); \ @@ -115,26 +60,9 @@ assert(expr); \ } -# define ASSERTS(expr) \ - if (!(expr)) { \ - printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \ - __LINE__, __FILE__); \ - assert(expr); \ - } - -# define ASSERTSP(expr, msg) \ - if (!(expr)) { \ - printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \ - __LINE__, __FILE__); \ - printf msg ; \ - assert(expr); \ - } - #else -# define ASSERT(ctrl, expr) ; -# define ASSERTP(ctrl, expr,msg) ; -# define ASSERTS(expr) ; -# define ASSERTSP(expr, msg) ; +# define PASSERT(ctrl, expr) ; +# define PASSERTP(ctrl, expr,msg) ; #endif @@ -143,15 +71,15 @@ * **************************************************************************/ #define BNDInsert(nbnd, bndind, bndptr, vtx) \ do { \ - bndind[nbnd] = vtx; \ - bndptr[vtx] = nbnd++;\ - } while(0) + bndind[nbnd] = vtx; \ + bndptr[vtx] = nbnd++;\ + } while(0) #define BNDDelete(nbnd, bndind, bndptr, vtx) \ do { \ - bndind[bndptr[vtx]] = bndind[--nbnd]; \ - bndptr[bndind[nbnd]] = bndptr[vtx]; \ - bndptr[vtx] = -1; \ - } while(0) + bndind[bndptr[vtx]] = bndind[--nbnd]; \ + bndptr[bndind[nbnd]] = bndptr[vtx]; \ + bndptr[vtx] = -1; \ + } while(0) diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/match.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/match.c index 6dc0e3aa..568d2b84 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/match.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/match.c @@ -8,7 +8,7 @@ * Started 2/22/96 * George * - * $Id: match.c,v 1.2 2003/07/21 17:18:50 karypis Exp $ + * $Id: match.c 10592 2011-07-16 21:17:53Z karypis $ * */ @@ -17,36 +17,37 @@ #define LHTSIZE 8192 /* This should be a power of two */ #define MASK 8191 /* This should be equal to LHTSIZE-1 */ + /*************************************************************************/ /*! Finds a HEM matching involving both local and remote vertices */ /*************************************************************************/ -void Match_Global(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) +void Match_Global0(ctrl_t *ctrl, graph_t *graph) { - int h, i, ii, j, k; - int nnbrs, nvtxs, ncon, cnvtxs, firstvtx, lastvtx, maxi, maxidx, nkept; - int otherlastvtx, nrequests, nchanged, pass, nmatched, wside; - idxtype *xadj, *ladjncy, *adjwgt, *vtxdist, *home, *myhome, *shome, *rhome; - idxtype *match, *rmatch, *smatch; - idxtype *peind, *sendptr, *recvptr; - idxtype *perm, *iperm, *nperm, *changed; - float *nvwgt, maxnvwgt; - int *nreqs_pe; - KeyValueType *match_requests, *match_granted, *pe_requests; - int last_unmatched; - - ASSERT(ctrl, wspace->nlarge > graph->xadj[graph->nvtxs]); - - maxnvwgt = 0.75/((float)(ctrl->CoarsenTo)); + idx_t h, i, ii, j, jj, k, v; + idx_t nnbrs, nvtxs, ncon, cnvtxs, firstvtx, lastvtx, maxi, maxidx, nkept; + idx_t otherlastvtx, nrequests, nchanged, pass, nmatched, wside; + idx_t *xadj, *adjncy, *adjwgt, *vtxdist, *home, *myhome; + idx_t *match; + idx_t *peind, *sendptr, *recvptr; + idx_t *perm, *iperm, *nperm, *changed; + real_t *nvwgt, maxnvwgt; + idx_t *nreqs_pe; + ikv_t *match_requests, *match_granted, *pe_requests; + idx_t last_unmatched; + + WCOREPUSH; + + maxnvwgt = 0.75/((real_t)(ctrl->CoarsenTo)); graph->match_type = PARMETIS_MTYPE_GLOBAL; - IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); + IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->MatchTmr)); nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; - ladjncy = graph->adjncy; + adjncy = graph->adjncy; adjwgt = graph->adjwgt; home = graph->home; nvwgt = graph->nvwgt; @@ -55,65 +56,395 @@ void Match_Global(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) firstvtx = vtxdist[ctrl->mype]; lastvtx = vtxdist[ctrl->mype+1]; - match = graph->match = idxsmalloc(nvtxs+graph->nrecv, UNMATCHED, "GlobalMatch: match"); - myhome = idxsmalloc(nvtxs+graph->nrecv, UNMATCHED, "GlobalMatch: myhome"); + nnbrs = graph->nnbrs; + peind = graph->peind; + sendptr = graph->sendptr; + recvptr = graph->recvptr; + + match = graph->match = ismalloc(nvtxs+graph->nrecv, UNMATCHED, "GlobalMatch: match"); + + /* wspacemalloc'ed arrays */ + myhome = iset(nvtxs+graph->nrecv, UNMATCHED, iwspacemalloc(ctrl, nvtxs+graph->nrecv)); + nreqs_pe = iset(nnbrs, 0, iwspacemalloc(ctrl, nnbrs)); + perm = iwspacemalloc(ctrl, nvtxs); + iperm = iwspacemalloc(ctrl, nvtxs); + nperm = iwspacemalloc(ctrl, nnbrs); + changed = iwspacemalloc(ctrl, nvtxs); - /*------------------------------------------------------------ - / Send/Receive the home information of interface vertices - /------------------------------------------------------------*/ + match_requests = ikvwspacemalloc(ctrl, graph->nsend); + match_granted = ikvwspacemalloc(ctrl, graph->nrecv); + + + /* create the traversal order */ + FastRandomPermute(nvtxs, perm, 1); + for (i=0; ipartType == ADAPTIVE_PARTITION || ctrl->partType == REFINE_PARTITION) { - ASSERT(ctrl, home != NULL); - idxcopy(nvtxs, home, myhome); - shome = wspace->indices; - rhome = myhome + nvtxs; - CommInterfaceData(ctrl, graph, myhome, shome, rhome); + PASSERT(ctrl, home != NULL); + icopy(nvtxs, home, myhome); + CommInterfaceData(ctrl, graph, myhome, myhome+nvtxs); } - /* If coarsening for ordering, replace home with where information */ + /* if coarsening for ordering, replace home with where information */ if (ctrl->partType == ORDER_PARTITION) { - ASSERT(ctrl, graph->where != NULL); - idxcopy(nvtxs, graph->where, myhome); - shome = wspace->indices; - rhome = myhome + nvtxs; - CommInterfaceData(ctrl, graph, myhome, shome, rhome); + PASSERT(ctrl, graph->where != NULL); + icopy(nvtxs, graph->where, myhome); + CommInterfaceData(ctrl, graph, myhome, myhome+nvtxs); } + + /* mark all heavy vertices as TOO_HEAVY so they will not be matched */ + for (nchanged=i=0; i maxnvwgt) { + match[i] = TOO_HEAVY; + nchanged++; + break; + } + } + } + + /* If no heavy vertices, pick one at random and treat it as such so that + at the end of the matching each partition will still have one vertex. + This is to eliminate the cases in which once a matching has been + computed, a processor ends up having no vertices */ + if (nchanged == 0) + match[RandomInRange(nvtxs)] = TOO_HEAVY; + + CommInterfaceData(ctrl, graph, match, match+nvtxs); + + + /* set initial value of nkept based on how over/under weight the + partition is to begin with */ + nkept = graph->gnvtxs/ctrl->npes - nvtxs; + + + /* Find a matching by doing multiple iterations */ + for (nmatched=pass=0; passlevel+pass)%2; + nchanged = nrequests = 0; + for (last_unmatched=ii=nmatched; ii k ? KEEP_BIT : 0); + changed[nchanged++] = i; + changed[nchanged++] = k; + break; + } + } + continue; + } + + /* Find a heavy-edge matching */ + for (j=xadj[i]; j= 0)) { + maxi = j; + maxidx = k; + } + } + } + } + + /* two-hop -- only local for now */ + if (maxi == -1 && ctrl->twohop && pass == NMATCH_PASSES-1) { + for (j=xadj[i]; j= nvtxs) + continue; + for (jj=xadj[v]; jj= nvtxs) + continue; + if (k != i && match[k] == UNMATCHED) { + maxi = jj; + break; + } + } + if (maxi != -1) + break; + } + } + + + if (maxi != -1) { + k = adjncy[maxi]; + if (k < nvtxs) { /* Take care the local vertices first */ + /* Here we give preference the local matching by granting it right away */ + match[i] = firstvtx+k + (i <= k ? KEEP_BIT : 0); + match[k] = firstvtx+i + (i > k ? KEEP_BIT : 0); + changed[nchanged++] = i; + changed[nchanged++] = k; + } + else { /* Take care any remote boundary vertices */ + match[k] = MAYBE_MATCHED; + /* Alternate among which vertices will issue the requests */ + if ((wside == 0 && firstvtx+i < graph->imap[k]) || + (wside == 1 && firstvtx+i > graph->imap[k])) { + match[i] = MAYBE_MATCHED; + match_requests[nrequests].key = graph->imap[k]; + match_requests[nrequests].val = firstvtx+i; + nrequests++; + } + } + } + } + } + + + /*********************************************************** + * Exchange the match_requests, requests for me are stored in + * match_granted + ************************************************************/ + /* Issue the receives first. Note that from each PE can receive a maximum + of the interface node that it needs to send it in the case of a mat-vec */ + for (i=0; icomm, ctrl->rreq+i); + } + + /* Issue the sends next. This needs some work */ + ikvsorti(nrequests, match_requests); + for (j=i=0; icomm, ctrl->sreq+i); + j = k; + } + + /* OK, now get into the loop waiting for the operations to finish */ + gkMPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); + for (i=0; istatuses+i, IDX_T, nreqs_pe+i); + nreqs_pe[i] = nreqs_pe[i]/2; /* Adjust for pairs of IDX_T */ + } + gkMPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); + + + /*********************************************************** + * Now, go and service the requests that you received in + * match_granted + ************************************************************/ + RandomPermute(nnbrs, nperm, 0); + for (ii=0; ii= firstvtx && k < lastvtx, (ctrl, "%"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"\n", firstvtx, lastvtx, k, j, peind[i])); + /* myprintf(ctrl, "Requesting a match %"PRIDX" %"PRIDX"\n", pe_requests[j].key, pe_requests[j].val); */ + if (match[k-firstvtx] == UNMATCHED) { /* Bingo, lets grant this request */ + changed[nchanged++] = k-firstvtx; + if (nkept >= 0) { /* decide who to keep it based on local balance */ + match[k-firstvtx] = pe_requests[j].val + KEEP_BIT; + nkept--; + } + else { + match[k-firstvtx] = pe_requests[j].val; + pe_requests[j].key += KEEP_BIT; + nkept++; + } + /* myprintf(ctrl, "Request from pe:%"PRIDX" (%"PRIDX" %"PRIDX") granted!\n", peind[i], pe_requests[j].val, pe_requests[j].key); */ + } + else { /* We are not granting the request */ + /* myprintf(ctrl, "Request from pe:%"PRIDX" (%"PRIDX" %"PRIDX") not granted!\n", peind[i], pe_requests[j].val, pe_requests[j].key); */ + pe_requests[j].key = UNMATCHED; + } + } + } + + + /*********************************************************** + * Exchange the match_granted information. It is stored in + * match_requests + ************************************************************/ + /* Issue the receives first. Note that from each PE can receive a maximum + of the interface node that it needs to send during the case of a mat-vec */ + for (i=0; icomm, ctrl->rreq+i); + } + + /* Issue the sends next. */ + for (i=0; icomm, ctrl->sreq+i); + } + + /* OK, now get into the loop waiting for the operations to finish */ + gkMPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); + for (i=0; istatuses+i, IDX_T, nreqs_pe+i); + nreqs_pe[i] = nreqs_pe[i]/2; /* Adjust for pairs of IDX_T */ + } + gkMPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); + + + /*********************************************************** + * Now, go and through the match_requests and update local + * match information for the matchings that were granted. + ************************************************************/ + for (i=0; i= KEEP_BIT) { /* A matched vertex which I get to keep */ + cnvtxs++; + } + } + + if (ctrl->dbglvl&DBG_MATCHINFO) { + PrintVector2(ctrl, nvtxs, firstvtx, match, "Match"); + myprintf(ctrl, "Cnvtxs: %"PRIDX"\n", cnvtxs); + rprintf(ctrl, "Done with matching...\n"); + } + + WCOREPOP; + + IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm)); + IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->MatchTmr)); + IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ContractTmr)); + + CreateCoarseGraph_Global(ctrl, graph, cnvtxs); + + if (ctrl->dropedges) + DropEdges(ctrl, graph->coarser); + + IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm)); + IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ContractTmr)); +} + + +/*************************************************************************/ +/*! Finds a HEM matching involving both local and remote vertices */ +/*************************************************************************/ +void Match_Global(ctrl_t *ctrl, graph_t *graph) +{ + idx_t h, i, ii, j, jj, k, v; + idx_t nnbrs, nvtxs, ncon, cnvtxs, firstvtx, lastvtx, maxi, maxidx, nkept; + idx_t otherlastvtx, nrequests, nchanged, pass, nmatched, wside; + idx_t *xadj, *adjncy, *adjwgt, *vtxdist, *home, *myhome; + idx_t *match; + idx_t *peind, *sendptr, *recvptr; + idx_t *perm, *iperm, *nperm, *changed; + real_t *nvwgt, maxnvwgt; + idx_t *nreqs_pe; + ikv_t *match_requests, *match_granted, *pe_requests; + idx_t last_unmatched; + + WCOREPUSH; + + maxnvwgt = 0.75/((real_t)(ctrl->CoarsenTo)); + + graph->match_type = PARMETIS_MTYPE_GLOBAL; + + IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm)); + IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->MatchTmr)); + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + home = graph->home; + nvwgt = graph->nvwgt; + + vtxdist = graph->vtxdist; + firstvtx = vtxdist[ctrl->mype]; + lastvtx = vtxdist[ctrl->mype+1]; + nnbrs = graph->nnbrs; peind = graph->peind; sendptr = graph->sendptr; recvptr = graph->recvptr; - /* Use wspace->indices as the tmp space for matching info of the boundary - * vertices that are sent and received */ - rmatch = match + nvtxs; - smatch = wspace->indices; - changed = smatch+graph->nsend; + match = graph->match = ismalloc(nvtxs+graph->nrecv, UNMATCHED, "GlobalMatch: match"); - /* Use wspace->indices as the tmp space for match requests of the boundary - * vertices that are sent and received */ - match_requests = wspace->pairs; - match_granted = match_requests + graph->nsend; + /* wspacemalloc'ed arrays */ + myhome = iset(nvtxs+graph->nrecv, UNMATCHED, iwspacemalloc(ctrl, nvtxs+graph->nrecv)); + nreqs_pe = iset(nnbrs, 0, iwspacemalloc(ctrl, nnbrs)); + perm = iwspacemalloc(ctrl, nvtxs); + iperm = iwspacemalloc(ctrl, nvtxs); + nperm = iwspacemalloc(ctrl, nnbrs); + changed = iwspacemalloc(ctrl, nvtxs); - nreqs_pe = ismalloc(nnbrs, 0, "Match_HEM: nreqs_pe"); + match_requests = ikvwspacemalloc(ctrl, graph->nsend); + match_granted = ikvwspacemalloc(ctrl, graph->nrecv); - nkept = graph->gnvtxs/ctrl->npes - nvtxs; - perm = (idxtype *)wspace->degrees; - iperm = perm + nvtxs; + /* create the traversal order */ FastRandomPermute(nvtxs, perm, 1); for (i=0; ipartType == ADAPTIVE_PARTITION || ctrl->partType == REFINE_PARTITION) { + PASSERT(ctrl, home != NULL); + icopy(nvtxs, home, myhome); + CommInterfaceData(ctrl, graph, myhome, myhome+nvtxs); + } - nperm = iperm + nvtxs; - for (i=0; ipartType == ORDER_PARTITION) { + PASSERT(ctrl, graph->where != NULL); + icopy(nvtxs, graph->where, myhome); + CommInterfaceData(ctrl, graph, myhome, myhome+nvtxs); + } - /* First mark all heavy vertices as TOO_HEAVY so they will not be matched */ + /* mark all heavy vertices as TOO_HEAVY so they will not be matched */ for (nchanged=i=0; i maxnvwgt) { match[i] = TOO_HEAVY; + nchanged++; break; } } @@ -126,12 +457,15 @@ void Match_Global(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) if (nchanged == 0) match[RandomInRange(nvtxs)] = TOO_HEAVY; - CommInterfaceData(ctrl, graph, match, smatch, rmatch); + CommInterfaceData(ctrl, graph, match, match+nvtxs); - /************************************************************* - * Go now and find a matching by doing multiple iterations - *************************************************************/ + /* set initial value of nkept based on how over/under weight the + partition is to begin with */ + nkept = graph->gnvtxs/ctrl->npes - nvtxs; + + + /* Find a matching by doing multiple iterations */ for (nmatched=pass=0; passlevel+pass)%2; nchanged = nrequests = 0; @@ -143,28 +477,23 @@ void Match_Global(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) /* Deal with islands. Find another vertex and match it with */ if (xadj[i] == xadj[i+1]) { - last_unmatched = amax(ii, last_unmatched)+1; + last_unmatched = gk_max(ii, last_unmatched)+1; for (; last_unmatched k ? KEEP_BIT : 0); + changed[nchanged++] = i; + changed[nchanged++] = k; break; } } continue; } - /* Find a heavy-edge matching. */ + /* Find a heavy-edge matching */ for (j=xadj[i]; j k ? KEEP_BIT : 0); changed[nchanged++] = i; changed[nchanged++] = k; } else { /* Take care any remote boundary vertices */ match[k] = MAYBE_MATCHED; /* Alternate among which vertices will issue the requests */ - if ((wside ==0 && firstvtx+i < graph->imap[k]) || (wside == 1 && firstvtx+i > graph->imap[k])) { + if ((wside == 0 && firstvtx+i < graph->imap[k]) || + (wside == 1 && firstvtx+i > graph->imap[k])) { match[i] = MAYBE_MATCHED; match_requests[nrequests].key = graph->imap[k]; match_requests[nrequests].val = firstvtx+i; @@ -214,12 +538,6 @@ void Match_Global(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) } -#ifdef DEBUG_MATCH - PrintVector2(ctrl, nvtxs, firstvtx, match, "Match1"); - myprintf(ctrl, "[c: %2d] Nlocal: %d, Nrequests: %d\n", c, nlocal, nrequests); -#endif - - /*********************************************************** * Exchange the match_requests, requests for me are stored in * match_granted @@ -227,26 +545,27 @@ void Match_Global(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) /* Issue the receives first. Note that from each PE can receive a maximum of the interface node that it needs to send it in the case of a mat-vec */ for (i=0; icomm, ctrl->rreq+i); } /* Issue the sends next. This needs some work */ - ikeysort(nrequests, match_requests); + ikvsorti(nrequests, match_requests); for (j=i=0; icomm, ctrl->sreq+i); + gkMPI_Isend((void *)(match_requests+j), 2*(k-j), IDX_T, peind[i], 1, + ctrl->comm, ctrl->sreq+i); j = k; } /* OK, now get into the loop waiting for the operations to finish */ - MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); + gkMPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); for (i=0; istatuses+i, IDX_DATATYPE, nreqs_pe+i); - nreqs_pe[i] = nreqs_pe[i]/2; /* Adjust for pairs of IDX_DATATYPE */ + gkMPI_Get_count(ctrl->statuses+i, IDX_T, nreqs_pe+i); + nreqs_pe[i] = nreqs_pe[i]/2; /* Adjust for pairs of IDX_T */ } - MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); + gkMPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); /*********************************************************** @@ -259,11 +578,11 @@ void Match_Global(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) pe_requests = match_granted+recvptr[i]; for (j=0; j= firstvtx && k < lastvtx, (ctrl, "%d %d %d %d %d\n", firstvtx, lastvtx, k, j, peind[i])); - /* myprintf(ctrl, "Requesting a match %d %d\n", pe_requests[j].key, pe_requests[j].val); */ + PASSERTP(ctrl, k >= firstvtx && k < lastvtx, (ctrl, "%"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"\n", firstvtx, lastvtx, k, j, peind[i])); + /* myprintf(ctrl, "Requesting a match %"PRIDX" %"PRIDX"\n", pe_requests[j].key, pe_requests[j].val); */ if (match[k-firstvtx] == UNMATCHED) { /* Bingo, lets grant this request */ changed[nchanged++] = k-firstvtx; - if (nkept >= 0) { /* Flip a coin for who gets it */ + if (nkept >= 0) { /* decide who to keep it based on local balance */ match[k-firstvtx] = pe_requests[j].val + KEEP_BIT; nkept--; } @@ -272,10 +591,10 @@ void Match_Global(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) pe_requests[j].key += KEEP_BIT; nkept++; } - /* myprintf(ctrl, "Request from pe:%d (%d %d) granted!\n", peind[i], pe_requests[j].val, pe_requests[j].key); */ + /* myprintf(ctrl, "Request from pe:%"PRIDX" (%"PRIDX" %"PRIDX") granted!\n", peind[i], pe_requests[j].val, pe_requests[j].key); */ } else { /* We are not granting the request */ - /* myprintf(ctrl, "Request from pe:%d (%d %d) not granted!\n", peind[i], pe_requests[j].val, pe_requests[j].key); */ + /* myprintf(ctrl, "Request from pe:%"PRIDX" (%"PRIDX" %"PRIDX") not granted!\n", peind[i], pe_requests[j].val, pe_requests[j].key); */ pe_requests[j].key = UNMATCHED; } } @@ -289,23 +608,23 @@ void Match_Global(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) /* Issue the receives first. Note that from each PE can receive a maximum of the interface node that it needs to send during the case of a mat-vec */ for (i=0; icomm, ctrl->rreq+i); } /* Issue the sends next. */ for (i=0; icomm, ctrl->sreq+i); } /* OK, now get into the loop waiting for the operations to finish */ - MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); + gkMPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); for (i=0; istatuses+i, IDX_DATATYPE, nreqs_pe+i); - nreqs_pe[i] = nreqs_pe[i]/2; /* Adjust for pairs of IDX_DATATYPE */ + gkMPI_Get_count(ctrl->statuses+i, IDX_T, nreqs_pe+i); + nreqs_pe[i] = nreqs_pe[i]/2; /* Adjust for pairs of IDX_T */ } - MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); + gkMPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); /*********************************************************** @@ -328,7 +647,68 @@ void Match_Global(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) nmatched++; } - CommChangedInterfaceData(ctrl, graph, nchanged, changed, match, match_requests, match_granted, wspace->pv4); + CommChangedInterfaceData(ctrl, graph, nchanged, changed, match, + match_requests, match_granted); + } + + + /* Find a two-hop matching */ + if (ctrl->twohop) { + WCOREPUSH; + + /* create the "transposed" adjancency list that includes remote vertices */ + idx_t rnvtxs = nvtxs+graph->nrecv; + idx_t *rxadj = iset(rnvtxs+1, 0, iwspacemalloc(ctrl, rnvtxs+1)); + idx_t *radjncy = iwspacemalloc(ctrl, xadj[nvtxs]); + for (i=0; i k ? KEEP_BIT : 0); + changed[nchanged++] = i; + changed[nchanged++] = k; + maxi = 1; + break; + } + } + if (maxi) + break; + } + } + } + + for (i=0; idbglvl&DBG_MATCHINFO) { PrintVector2(ctrl, nvtxs, firstvtx, match, "Match"); - myprintf(ctrl, "Cnvtxs: %d\n", cnvtxs); + myprintf(ctrl, "Cnvtxs: %"PRIDX"\n", cnvtxs); rprintf(ctrl, "Done with matching...\n"); } - GKfree((void **)(&myhome), (void **)(&nreqs_pe), LTERM); + WCOREPOP; - IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); + IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->MatchTmr)); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ContractTmr)); - CreateCoarseGraph_Global(ctrl, graph, wspace, cnvtxs); + CreateCoarseGraph_Global(ctrl, graph, cnvtxs); - IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); - IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ContractTmr)); + if (ctrl->dropedges) + DropEdges(ctrl, graph->coarser); + IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm)); + IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ContractTmr)); } /*************************************************************************/ /*! Finds a HEM matching involving only local vertices */ /*************************************************************************/ -void Match_Local(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) +void Match_Local(ctrl_t *ctrl, graph_t *graph) { - int h, i, ii, j, k; - int nvtxs, ncon, cnvtxs, firstvtx, maxi, maxidx, edge; - idxtype *xadj, *ladjncy, *adjwgt, *vtxdist, *home, *myhome, *shome, *rhome; - idxtype *perm, *match; - float maxnvwgt, *nvwgt; + idx_t h, i, ii, j, k; + idx_t nvtxs, ncon, cnvtxs, firstvtx, maxi, maxidx, edge; + idx_t *xadj, *adjncy, *adjwgt, *vtxdist, *home, *myhome; + idx_t *perm, *match; + real_t maxnvwgt, *nvwgt; + + WCOREPUSH; - ASSERT(ctrl, wspace->nlarge > graph->xadj[graph->nvtxs]); + maxnvwgt = 0.75/((real_t)ctrl->CoarsenTo); graph->match_type = PARMETIS_MTYPE_LOCAL; - maxnvwgt = 0.75/((float)ctrl->CoarsenTo); - IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); + IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->MatchTmr)); - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - nvwgt = graph->nvwgt; - ladjncy = graph->adjncy; - adjwgt = graph->adjwgt; - home = graph->home; + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + nvwgt = graph->nvwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + home = graph->home; vtxdist = graph->vtxdist; firstvtx = vtxdist[ctrl->mype]; - match = graph->match = idxmalloc(nvtxs+graph->nrecv, "HEM_Match: match"); - myhome = idxsmalloc(nvtxs+graph->nrecv, UNMATCHED, "HEM_Match: myhome"); + match = graph->match = imalloc(nvtxs+graph->nrecv, "HEM_Match: match"); - idxset(nvtxs, UNMATCHED, match); - idxset(graph->nrecv, 0, match+nvtxs); /* Easy way to handle remote vertices */ + /* wspacemalloc'ed arrays */ + myhome = iset(nvtxs+graph->nrecv, UNMATCHED, iwspacemalloc(ctrl, nvtxs+graph->nrecv)); + perm = iwspacemalloc(ctrl, nvtxs); - /*------------------------------------------------------------ - / Send/Receive the home information of interface vertices - /------------------------------------------------------------*/ + + /* if coasening for adaptive/repartition, exchange home information */ if (ctrl->partType == ADAPTIVE_PARTITION || ctrl->partType == REFINE_PARTITION) { - idxcopy(nvtxs, home, myhome); - shome = wspace->indices; - rhome = myhome + nvtxs; - CommInterfaceData(ctrl, graph, myhome, shome, rhome); + icopy(nvtxs, home, myhome); + CommInterfaceData(ctrl, graph, myhome, myhome+nvtxs); } + /************************************************************* * Go now and find a local matching *************************************************************/ - perm = wspace->indices; + iset(nvtxs, UNMATCHED, match); + iset(graph->nrecv, 0, match+nvtxs); /* Easy way to handle remote vertices */ + FastRandomPermute(nvtxs, perm, 1); - cnvtxs = 0; - for (ii=0; ii= nvtxs) @@ -451,25 +833,18 @@ void Match_Local(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) } if (maxi != -1) { - k = ladjncy[maxi]; - if (i <= k) { - match[i] = firstvtx+k + KEEP_BIT; - match[k] = firstvtx+i; - } - else { - match[i] = firstvtx+k; - match[k] = firstvtx+i + KEEP_BIT; - } + k = adjncy[maxi]; + match[i] = firstvtx+k + (i <= k ? KEEP_BIT : 0); + match[k] = firstvtx+i + (i > k ? KEEP_BIT : 0); } else { - match[i] = (firstvtx+i) + KEEP_BIT; + match[i] = firstvtx+i + KEEP_BIT; } cnvtxs++; } } - CommInterfaceData(ctrl, graph, match, wspace->indices, match+nvtxs); - GKfree((void **)(&myhome), LTERM); + CommInterfaceData(ctrl, graph, match, match+nvtxs); #ifdef DEBUG_MATCH PrintVector2(ctrl, nvtxs, firstvtx, match, "Match1"); @@ -478,15 +853,17 @@ void Match_Local(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) if (ctrl->dbglvl&DBG_MATCHINFO) { PrintVector2(ctrl, nvtxs, firstvtx, match, "Match"); - myprintf(ctrl, "Cnvtxs: %d\n", cnvtxs); + myprintf(ctrl, "Cnvtxs: %"PRIDX"\n", cnvtxs); rprintf(ctrl, "Done with matching...\n"); } - IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); + WCOREPOP; + + IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->MatchTmr)); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ContractTmr)); - CreateCoarseGraph_Local(ctrl, graph, wspace, cnvtxs); + CreateCoarseGraph_Local(ctrl, graph, cnvtxs); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ContractTmr)); } @@ -495,55 +872,47 @@ void Match_Local(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) /*************************************************************************/ /*! This function creates the coarser graph after a global matching */ /*************************************************************************/ -void CreateCoarseGraph_Global(CtrlType *ctrl, GraphType *graph, - WorkSpaceType *wspace, int cnvtxs) +void CreateCoarseGraph_Global(ctrl_t *ctrl, graph_t *graph, idx_t cnvtxs) { - int h, i, j, k, l, ii, jj, ll, nnbrs, nvtxs, nedges, ncon; - int firstvtx, lastvtx, cfirstvtx, clastvtx, otherlastvtx; - int npes=ctrl->npes, mype=ctrl->mype; - int cnedges, nsend, nrecv, nkeepsize, nrecvsize, nsendsize, v, u; - idxtype *xadj, *ladjncy, *adjwgt, *vwgt, *vsize, *vtxdist, *home, *where; - idxtype *match, *cmap, *rcmap, *scmap; - idxtype *cxadj, *cadjncy, *cadjwgt, *cvwgt, *cvsize = NULL, *chome = NULL, + idx_t h, i, j, k, l, ii, jj, ll, nnbrs, nvtxs, nedges, ncon; + idx_t firstvtx, lastvtx, cfirstvtx, clastvtx, otherlastvtx; + idx_t npes=ctrl->npes, mype=ctrl->mype; + idx_t cnedges, nsend, nrecv, nkeepsize, nrecvsize, nsendsize, v, u; + idx_t *xadj, *adjncy, *adjwgt, *vwgt, *vsize, *vtxdist, *home, *where; + idx_t *match, *cmap; + idx_t *cxadj, *cadjncy, *cadjwgt, *cvwgt, *cvsize = NULL, *chome = NULL, *cwhere = NULL, *cvtxdist; - idxtype *rsizes, *ssizes, *rlens, *slens, *rgraph, *sgraph, *perm; - idxtype *peind, *recvptr, *recvind; - float *nvwgt, *cnvwgt; - GraphType *cgraph; - KeyValueType *scand, *rcand; - int htable[LHTSIZE], htableidx[LHTSIZE]; - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - - vtxdist = graph->vtxdist; - xadj = graph->xadj; - vwgt = graph->vwgt; - vsize = graph->vsize; - nvwgt = graph->nvwgt; - home = graph->home; - where = graph->where; - ladjncy = graph->adjncy; - adjwgt = graph->adjwgt; - - match = graph->match; + idx_t *rsizes, *ssizes, *rlens, *slens, *rgraph, *sgraph, *perm; + idx_t *peind, *recvptr, *recvind; + real_t *nvwgt, *cnvwgt; + graph_t *cgraph; + ikv_t *scand, *rcand; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + vsize = graph->vsize; + nvwgt = graph->nvwgt; + home = graph->home; + where = graph->where; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + match = graph->match; + vtxdist = graph->vtxdist; firstvtx = vtxdist[mype]; lastvtx = vtxdist[mype+1]; - cmap = graph->cmap = idxmalloc(nvtxs+graph->nrecv, "Global_CreateCoarseGraph: cmap"); + cmap = graph->cmap = ismalloc(nvtxs+graph->nrecv, -1, "Global_CreateCoarseGraph: cmap"); nnbrs = graph->nnbrs; peind = graph->peind; recvind = graph->recvind; recvptr = graph->recvptr; - /* Use wspace->indices as the tmp space for map of the boundary - * vertices that are sent and received */ - scmap = wspace->indices; - rcmap = cmap + nvtxs; - - /* Initialize the coarser graph */ cgraph = CreateGraph(); cgraph->nvtxs = cnvtxs; @@ -553,15 +922,14 @@ void CreateCoarseGraph_Global(CtrlType *ctrl, GraphType *graph, graph->coarser = cgraph; - /************************************************************* * Obtain the vtxdist of the coarser graph **************************************************************/ - cvtxdist = cgraph->vtxdist = idxmalloc(npes+1, "Global_CreateCoarseGraph: cvtxdist"); + cvtxdist = cgraph->vtxdist = imalloc(npes+1, "Global_CreateCoarseGraph: cvtxdist"); cvtxdist[npes] = cnvtxs; /* Use last position in the cvtxdist as a temp buffer */ - MPI_Allgather((void *)(cvtxdist+npes), 1, IDX_DATATYPE, (void *)cvtxdist, 1, - IDX_DATATYPE, ctrl->comm); + gkMPI_Allgather((void *)(cvtxdist+npes), 1, IDX_T, (void *)cvtxdist, 1, + IDX_T, ctrl->comm); MAKECSR(i, npes, cvtxdist); @@ -578,9 +946,8 @@ void CreateCoarseGraph_Global(CtrlType *ctrl, GraphType *graph, cfirstvtx = cvtxdist[mype]; clastvtx = cvtxdist[mype+1]; - /* Create the cmap of what you know so far locally */ - cnvtxs = 0; - for (i=0; i= KEEP_BIT) { k = match[i] - KEEP_BIT; if (k>=firstvtx && knrecv, recvind, match[i])]; + if (match[i] < KEEP_BIT) { /* Only vertices that go away satisfy this */ + cmap[i] = cmap[nvtxs+BSearch(graph->nrecv, recvind, match[i])]; } } - CommInterfaceData(ctrl, graph, cmap, scmap, rcmap); + CommInterfaceData(ctrl, graph, cmap, cmap+nvtxs); + + +#ifndef NDEBUG + for (i=0; inrecv; i++) { + if (cmap[i] == -1) + errexit("cmap[%"PRIDX"] == -1\n", i); + } +#endif #ifdef DEBUG_CONTRACT @@ -616,10 +991,21 @@ void CreateCoarseGraph_Global(CtrlType *ctrl, GraphType *graph, /************************************************************* * Determine how many adjcency lists you need to send/receive. **************************************************************/ - /* Use wspace->pairs as the tmp space for the boundary vertices that are sent and received */ - scand = wspace->pairs; - rcand = graph->rcand = (KeyValueType *)GKmalloc(recvptr[nnbrs]*sizeof(KeyValueType), "CreateCoarseGraph: rcand"); + /* first pass: determine sizes */ + for (nsend=0, nrecv=0, i=0; i=lastvtx) /* This is comming from afar */ + nrecv++; + } + } + scand = ikvwspacemalloc(ctrl, nsend); + rcand = graph->rcand = ikvmalloc(nrecv, "CreateCoarseGraph: rcand"); + + /* second pass: place them in the appropriate arrays */ nkeepsize = nsend = nrecv = 0; for (i=0; i=lastvtx) { /* This is comming from afar */ rcand[nrecv].key = k; rcand[nrecv].val = cmap[i] - cfirstvtx; /* Set it for use during the partition projection */ - ASSERT(ctrl, rcand[nrecv].val>=0 && rcand[nrecv].val=0 && rcand[nrecv].valpv1; - ssizes = wspace->pv2; - idxset(nnbrs, 0, ssizes); - idxset(nnbrs, 0, rsizes); - rlens = graph->rlens = idxmalloc(nnbrs+1, "CreateCoarseGraph: graph->rlens"); - slens = graph->slens = idxmalloc(nnbrs+1, "CreateCoarseGraph: graph->slens"); + rlens = graph->rlens = imalloc(nnbrs+1, "CreateCoarseGraph: graph->rlens"); + slens = graph->slens = imalloc(nnbrs+1, "CreateCoarseGraph: graph->slens"); + + rsizes = iset(nnbrs, 0, iwspacemalloc(ctrl, nnbrs)); + ssizes = iset(nnbrs, 0, iwspacemalloc(ctrl, nnbrs)); /* Take care the sending data first */ - ikeyvalsort(nsend, scand); + ikvsortii(nsend, scand); slens[0] = 0; for (k=i=0; i 0) /* Issue a receive only if you are getting something */ - MPI_Irecv((void *)(rsizes+i), 1, IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->rreq+i); + gkMPI_Irecv((void *)(rsizes+i), 1, IDX_T, peind[i], 1, ctrl->comm, ctrl->rreq+i); } /* Take care the sending data next */ for (i=0; i 0) /* Issue a send only if you are sending something */ - MPI_Isend((void *)(ssizes+i), 1, IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->sreq+i); + gkMPI_Isend((void *)(ssizes+i), 1, IDX_T, peind[i], 1, ctrl->comm, ctrl->sreq+i); } /* OK, now get into the loop waiting for the operations to finish */ for (i=0; i 0) - MPI_Wait(ctrl->rreq+i, &ctrl->status); + gkMPI_Wait(ctrl->rreq+i, &ctrl->status); } for (i=0; i 0) - MPI_Wait(ctrl->sreq+i, &ctrl->status); + gkMPI_Wait(ctrl->sreq+i, &ctrl->status); } @@ -718,22 +1103,19 @@ void CreateCoarseGraph_Global(CtrlType *ctrl, GraphType *graph, * rgraph and sgraph is a different data structure than CSR * to facilitate single message exchange. **************************************************************/ - nrecvsize = idxsum(nnbrs, rsizes); - nsendsize = idxsum(nnbrs, ssizes); - if ((4+ncon)*(nrecv+nsend) + 2*(nrecvsize+nsendsize) <= wspace->nlarge) { - rgraph = (idxtype *)wspace->degrees; - sgraph = rgraph + (4+ncon)*nrecv+2*nrecvsize; - } - else { - rgraph = idxmalloc((4+ncon)*nrecv+2*nrecvsize, "CreateCoarseGraph: rgraph"); - sgraph = idxmalloc((4+ncon)*nsend+2*nsendsize, "CreateCoarseGraph: sgraph"); - } + nrecvsize = isum(nnbrs, rsizes, 1); + nsendsize = isum(nnbrs, ssizes, 1); + rgraph = iwspacemalloc(ctrl, (4+ncon)*nrecv+2*nrecvsize); + + WCOREPUSH; /* for freeing sgraph right away */ + sgraph = iwspacemalloc(ctrl, (4+ncon)*nsend+2*nsendsize); /* Deal with the received portion first */ for (l=i=0; i 0) { - MPI_Irecv((void *)(rgraph+l), (4+ncon)*(rlens[i+1]-rlens[i])+2*rsizes[i], IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->rreq+i); + gkMPI_Irecv((void *)(rgraph+l), (4+ncon)*(rlens[i+1]-rlens[i])+2*rsizes[i], + IDX_T, peind[i], 1, ctrl->comm, ctrl->rreq+i); l += (4+ncon)*(rlens[i+1]-rlens[i])+2*rsizes[i]; } } @@ -753,15 +1135,15 @@ void CreateCoarseGraph_Global(CtrlType *ctrl, GraphType *graph, sgraph[ll++] = (ctrl->partType == STATIC_PARTITION || ctrl->partType == ORDER_PARTITION ? -1 : home[ii]); for (jj=xadj[ii]; jjcomm, ctrl->sreq+i); + /*myprintf(ctrl, "Sending to pe:%"PRIDX", %"PRIDX" lists of size %"PRIDX"\n", peind[i], slens[i+1]-slens[i], ssizes[i]); */ + gkMPI_Isend((void *)(sgraph+l), ll-l, IDX_T, peind[i], 1, ctrl->comm, ctrl->sreq+i); l = ll; } } @@ -769,11 +1151,11 @@ void CreateCoarseGraph_Global(CtrlType *ctrl, GraphType *graph, /* OK, now get into the loop waiting for the operations to finish */ for (i=0; i 0) - MPI_Wait(ctrl->rreq+i, &ctrl->status); + gkMPI_Wait(ctrl->rreq+i, &ctrl->status); } for (i=0; i 0) - MPI_Wait(ctrl->sreq+i, &ctrl->status); + gkMPI_Wait(ctrl->sreq+i, &ctrl->status); } @@ -782,11 +1164,13 @@ void CreateCoarseGraph_Global(CtrlType *ctrl, GraphType *graph, PrintTransferedGraphs(ctrl, nnbrs, peind, slens, rlens, sgraph, rgraph); #endif + WCOREPOP; /* free sgraph */ + /************************************************************* * Setup the mapping from indices returned by BSearch to * those that are actually stored **************************************************************/ - perm = idxsmalloc(recvptr[nnbrs], -1, "CreateCoarseGraph: perm"); + perm = iset(graph->nrecv, -1, iwspacemalloc(ctrl, graph->nrecv)); for (j=i=0; inrecv, recvind, rgraph[j])] = j+1; j += (4+ncon)+2*rgraph[j+1]; @@ -795,20 +1179,29 @@ void CreateCoarseGraph_Global(CtrlType *ctrl, GraphType *graph, /************************************************************* * Finally, create the coarser graph **************************************************************/ - /* Allocate memory for the coarser graph, and fire up coarsening */ - cxadj = cgraph->xadj = idxmalloc(cnvtxs+1, "CreateCoarserGraph: cxadj"); - cvwgt = cgraph->vwgt = idxmalloc(cnvtxs*ncon, "CreateCoarserGraph: cvwgt"); - cnvwgt = cgraph->nvwgt = fmalloc(cnvtxs*ncon, "CreateCoarserGraph: cnvwgt"); - cadjncy = idxmalloc(nkeepsize+nrecvsize, "CreateCoarserGraph: cadjncy"); - cadjwgt = idxmalloc(nkeepsize+nrecvsize, "CreateCoarserGraph: cadjwgt"); + /* Allocate memory for the coarser graph, and start creating the coarse graph */ + cxadj = cgraph->xadj = imalloc(cnvtxs+1, "CreateCoarserGraph: cxadj"); + cvwgt = cgraph->vwgt = imalloc(cnvtxs*ncon, "CreateCoarserGraph: cvwgt"); + cnvwgt = cgraph->nvwgt = rmalloc(cnvtxs*ncon, "CreateCoarserGraph: cnvwgt"); if (ctrl->partType == ADAPTIVE_PARTITION || ctrl->partType == REFINE_PARTITION) { - cvsize = cgraph->vsize = idxmalloc(cnvtxs, "CreateCoarserGraph: cvsize"); - chome = cgraph->home = idxmalloc(cnvtxs, "CreateCoarserGraph: chome"); + cvsize = cgraph->vsize = imalloc(cnvtxs, "CreateCoarserGraph: cvsize"); + chome = cgraph->home = imalloc(cnvtxs, "CreateCoarserGraph: chome"); } if (where != NULL) - cwhere = cgraph->where = idxmalloc(cnvtxs, "CreateCoarserGraph: cwhere"); + cwhere = cgraph->where = imalloc(cnvtxs, "CreateCoarserGraph: cwhere"); + if (ctrl->dropedges) + cgraph->unmatched = imalloc(cnvtxs, "CreateCoarserGraph: cgraph->unmatched"); - iset(LHTSIZE, -1, htable); + /* these are just upper bound estimates for now */ + cadjncy = iwspacemalloc(ctrl, nkeepsize+nrecvsize); + cadjwgt = iwspacemalloc(ctrl, nkeepsize+nrecvsize); + + + /* variables used for the htable */ + idx_t kk, m; + idx_t htsize, maxhtsize=8192, mask, maxclen; + idx_t *htable=NULL; + htable = ismalloc(maxhtsize, -1, "htable"); cxadj[0] = cnvtxs = cnedges = 0; for (i=0; i=firstvtx && u u) continue; /* I have already collapsed it as (u,v) */ + if (ctrl->dropedges) + cgraph->unmatched[cnvtxs] = (v == u ? 1 : 0); + + /* determine the maximum length of the combined adjacency list + and the size of the required htable */ + maxclen = xadj[i+1]-xadj[i]; + if (v != u) { + if (u>=firstvtx && unrecv, recvind, u)]]; + } + for (maxclen*=2, htsize=1; htsizepartType == ADAPTIVE_PARTITION || ctrl->partType == REFINE_PARTITION) { + + if (ctrl->partType == ADAPTIVE_PARTITION || ctrl->partType == REFINE_PARTITION) cvsize[cnvtxs] += vsize[u]; - /* chome[cnvtxs] = home[u]; */ - } for (j=xadj[u]; jpartType == ADAPTIVE_PARTITION || ctrl->partType == REFINE_PARTITION) { - cvsize[cnvtxs] += rgraph[u+1+ncon]; - chome[cnvtxs] = rgraph[u+2+ncon]; - } + + if (ctrl->partType == ADAPTIVE_PARTITION || ctrl->partType == REFINE_PARTITION) { + cvsize[cnvtxs] += rgraph[u+1+ncon]; + chome[cnvtxs] = rgraph[u+2+ncon]; + } + for (j=0; jnedges = cnedges; /* ADD: In order to keep from having to change this too much */ /* ADD: I kept vwgt array and recomputed nvwgt for each coarser graph */ - for (j=0; jnvwgt[j*ncon+h] = (float)(cvwgt[j*ncon+h])/(float)(ctrl->tvwgts[h]); + cgraph->nvwgt[j*ncon+h] = ctrl->invtvwgts[h]*cvwgt[j*ncon+h]; + } - cgraph->adjncy = idxmalloc(cnedges, "CreateCoarserGraph: cadjncy"); - cgraph->adjwgt = idxmalloc(cnedges, "CreateCoarserGraph: cadjwgt"); - idxcopy(cnedges, cadjncy, cgraph->adjncy); - idxcopy(cnedges, cadjwgt, cgraph->adjwgt); + cgraph->adjncy = imalloc(cnedges, "CreateCoarserGraph: cadjncy"); + cgraph->adjwgt = imalloc(cnedges, "CreateCoarserGraph: cadjwgt"); + icopy(cnedges, cadjncy, cgraph->adjncy); + icopy(cnedges, cadjwgt, cgraph->adjwgt); - /* Note that graph->where works fine even if it is NULL */ - GKfree((void **)&cadjncy, &cadjwgt, &graph->where, &perm, LTERM); + WCOREPOP; - if (rgraph != (idxtype *)wspace->degrees) - GKfree((void **)&rgraph, &sgraph, LTERM); + /* Note that graph->where works fine even if it is NULL */ + gk_free((void **)&graph->where, LTERM); } @@ -976,42 +1353,43 @@ void CreateCoarseGraph_Global(CtrlType *ctrl, GraphType *graph, /*************************************************************************/ /*! This function creates the coarser graph after a local matching */ /*************************************************************************/ -void CreateCoarseGraph_Local(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, int cnvtxs) +void CreateCoarseGraph_Local(ctrl_t *ctrl, graph_t *graph, idx_t cnvtxs) { - int h, i, j, k, l; - int nvtxs, ncon, nedges, firstvtx, cfirstvtx; - int npes=ctrl->npes, mype=ctrl->mype; - int cnedges, v, u; - idxtype *xadj, *vwgt, *vsize, *ladjncy, *adjwgt, *vtxdist, *where, *home; - idxtype *match, *cmap; - idxtype *cxadj, *cvwgt, *cvsize = NULL, *cadjncy, *cadjwgt, *cvtxdist, - *chome = NULL, *cwhere = NULL; - float *cnvwgt; - GraphType *cgraph; - int htable[LHTSIZE], htableidx[LHTSIZE]; - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - - vtxdist = graph->vtxdist; - xadj = graph->xadj; - vwgt = graph->vwgt; - home = graph->home; - vsize = graph->vsize; - ladjncy = graph->adjncy; - adjwgt = graph->adjwgt; - where = graph->where; - match = graph->match; + idx_t h, i, j, k, l; + idx_t nvtxs, ncon, nedges, firstvtx, cfirstvtx; + idx_t npes=ctrl->npes, mype=ctrl->mype; + idx_t cnedges, v, u; + idx_t *xadj, *vwgt, *vsize, *adjncy, *adjwgt, *vtxdist, *where, *home; + idx_t *match, *cmap; + idx_t *cxadj, *cvwgt, *cvsize = NULL, *cadjncy, *cadjwgt, *cvtxdist, + *chome = NULL, *cwhere = NULL; + real_t *cnvwgt; + graph_t *cgraph; + idx_t htable[LHTSIZE], htableidx[LHTSIZE]; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + home = graph->home; + vsize = graph->vsize; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + where = graph->where; + match = graph->match; + vtxdist = graph->vtxdist; firstvtx = vtxdist[mype]; - cmap = graph->cmap = idxmalloc(nvtxs+graph->nrecv, "CreateCoarseGraph: cmap"); + cmap = graph->cmap = ismalloc(nvtxs+graph->nrecv, -1, "CreateCoarseGraph: cmap"); /* Initialize the coarser graph */ cgraph = CreateGraph(); cgraph->nvtxs = cnvtxs; cgraph->level = graph->level+1; - cgraph->ncon = ncon; + cgraph->ncon = ncon; cgraph->finer = graph; graph->coarser = cgraph; @@ -1020,10 +1398,10 @@ void CreateCoarseGraph_Local(CtrlType *ctrl, GraphType *graph, WorkSpaceType *ws /************************************************************* * Obtain the vtxdist of the coarser graph **************************************************************/ - cvtxdist = cgraph->vtxdist = idxmalloc(npes+1, "CreateCoarseGraph: cvtxdist"); + cvtxdist = cgraph->vtxdist = imalloc(npes+1, "CreateCoarseGraph: cvtxdist"); cvtxdist[npes] = cnvtxs; /* Use last position in the cvtxdist as a temp buffer */ - MPI_Allgather((void *)(cvtxdist+npes), 1, IDX_DATATYPE, (void *)cvtxdist, 1, IDX_DATATYPE, ctrl->comm); + gkMPI_Allgather((void *)(cvtxdist+npes), 1, IDX_T, (void *)cvtxdist, 1, IDX_T, ctrl->comm); MAKECSR(i, npes, cvtxdist); @@ -1055,7 +1433,7 @@ void CreateCoarseGraph_Local(CtrlType *ctrl, GraphType *graph, WorkSpaceType *ws } } - CommInterfaceData(ctrl, graph, cmap, wspace->indices, cmap+nvtxs); + CommInterfaceData(ctrl, graph, cmap, cmap+nvtxs); #ifdef DEBUG_CONTRACT @@ -1068,17 +1446,18 @@ void CreateCoarseGraph_Local(CtrlType *ctrl, GraphType *graph, WorkSpaceType *ws * Finally, create the coarser graph **************************************************************/ /* Allocate memory for the coarser graph, and fire up coarsening */ - cxadj = cgraph->xadj = idxmalloc(cnvtxs+1, "CreateCoarserGraph: cxadj"); - cvwgt = cgraph->vwgt = idxmalloc(cnvtxs*ncon, "CreateCoarserGraph: cvwgt"); - cnvwgt = cgraph->nvwgt = fmalloc(cnvtxs*ncon, "CreateCoarserGraph: cnvwgt"); - cadjncy = idxmalloc(graph->nedges, "CreateCoarserGraph: cadjncy"); - cadjwgt = idxmalloc(graph->nedges, "CreateCoarserGraph: cadjwgt"); + cxadj = cgraph->xadj = imalloc(cnvtxs+1, "CreateCoarserGraph: cxadj"); + cvwgt = cgraph->vwgt = imalloc(cnvtxs*ncon, "CreateCoarserGraph: cvwgt"); + cnvwgt = cgraph->nvwgt = rmalloc(cnvtxs*ncon, "CreateCoarserGraph: cnvwgt"); if (ctrl->partType == ADAPTIVE_PARTITION || ctrl->partType == REFINE_PARTITION) - chome = cgraph->home = idxmalloc(cnvtxs, "CreateCoarserGraph: chome"); + chome = cgraph->home = imalloc(cnvtxs, "CreateCoarserGraph: chome"); if (vsize != NULL) - cvsize = cgraph->vsize = idxmalloc(cnvtxs, "CreateCoarserGraph: cvsize"); + cvsize = cgraph->vsize = imalloc(cnvtxs, "CreateCoarserGraph: cvsize"); if (where != NULL) - cwhere = cgraph->where = idxmalloc(cnvtxs, "CreateCoarserGraph: cwhere"); + cwhere = cgraph->where = imalloc(cnvtxs, "CreateCoarserGraph: cwhere"); + + cadjncy = iwspacemalloc(ctrl, graph->nedges); + cadjwgt = iwspacemalloc(ctrl, graph->nedges); iset(LHTSIZE, -1, htable); @@ -1102,7 +1481,7 @@ void CreateCoarseGraph_Local(CtrlType *ctrl, GraphType *graph, WorkSpaceType *ws nedges = 0; for (j=xadj[i]; jnedges = cnedges; - for (j=0; jnvwgt[j*ncon+h] = (float)(cvwgt[j*ncon+h])/(float)(ctrl->tvwgts[h]); + for (j=0; jnvwgt[j*ncon+h] = ctrl->invtvwgts[h]*cvwgt[j*ncon+h]; + } + + cgraph->adjncy = imalloc(cnedges, "CreateCoarserGraph: cadjncy"); + cgraph->adjwgt = imalloc(cnedges, "CreateCoarserGraph: cadjwgt"); + icopy(cnedges, cadjncy, cgraph->adjncy); + icopy(cnedges, cadjwgt, cgraph->adjwgt); - cgraph->adjncy = idxmalloc(cnedges, "CreateCoarserGraph: cadjncy"); - cgraph->adjwgt = idxmalloc(cnedges, "CreateCoarserGraph: cadjwgt"); - idxcopy(cnedges, cadjncy, cgraph->adjncy); - idxcopy(cnedges, cadjwgt, cgraph->adjwgt); + WCOREPOP; /* Note that graph->where works fine even if it is NULL */ - GKfree((void **)&cadjncy, (void **)&cadjwgt, (void **)&graph->where, LTERM); + gk_free((void **)&graph->where, LTERM); } +/*************************************************************************/ +/*! This function drops some of the edges of the graph to reduce memory + consumption. */ +/*************************************************************************/ +void DropEdges(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, ii, j, k, istart, iend, nvtxs, maxdegree; + idx_t *xadj, *adjncy, *adjwgt, *imap, *unmatched; + idx_t *noise, *medianwgts, *keys; + + WCOREPUSH; + + CommSetup(ctrl, graph); + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + unmatched = graph->unmatched; + imap = graph->imap; + + //myprintf(ctrl, ">nedges: %"PRIDX"\n", xadj[nvtxs]); + + maxdegree = xadj[1]; + for (i=1; inrecv); + noise = iwspacemalloc(ctrl, nvtxs+graph->nrecv); + keys = iwspacemalloc(ctrl, maxdegree+1); + + for (i=0; i>1)]; + } + } + CommInterfaceData(ctrl, graph, medianwgts, medianwgts+nvtxs); + + /* compact the adjacency structure of the coarser graph to keep only +ve edges */ + for (k=0, i=0; i= gk_min(medianwgts[i], medianwgts[ii])) { + adjncy[k] = imap[adjncy[j]]; /* keep and switch to global ID space */ + adjwgt[k++] = adjwgt[j]; + } + } + xadj[i] = k; + } + SHIFTCSR(i, nvtxs, xadj); + + graph->nedges = xadj[nvtxs]; + + //myprintf(ctrl, "unmatched, LTERM); + + FreeCommSetupFields(graph); + + WCOREPOP; + +} diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/mdiffusion.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/mdiffusion.c index 83b1b111..7a0d4019 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/mdiffusion.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/mdiffusion.c @@ -7,7 +7,7 @@ * Started 9/16/99 * George * - * $Id: mdiffusion.c,v 1.2 2003/07/21 17:18:50 karypis Exp $ + * $Id: mdiffusion.c 10542 2011-07-11 16:56:22Z karypis $ */ #include @@ -19,124 +19,119 @@ * This algorithm assembles the graph to all the processors and preceed * serially. **************************************************************************/ -int Mc_Diffusion(CtrlType *ctrl, GraphType *graph, idxtype *vtxdist, - idxtype *where, idxtype *home, WorkSpaceType *wspace, int npasses) +idx_t Mc_Diffusion(ctrl_t *ctrl, graph_t *graph, idx_t *vtxdist, idx_t *where, + idx_t *home, idx_t npasses) { - int h, i, j; - int nvtxs, nedges, ncon, pass, iter, domain, processor; - int nparts, mype, npes, nlinks, me, you, wsize; - int nvisited, nswaps = -1, tnswaps, done, alldone = -1; - idxtype *rowptr, *colind, *diff_where, *sr_where, *ehome, *map, *rmap; - idxtype *pack, *unpack, *match, *proc2sub, *sub2proc; - idxtype *visited, *gvisited; - float *transfer, *npwgts, maxdiff, minflow, maxflow; - float lbavg, oldlbavg, ubavg, lbvec[MAXNCON]; - float diff_flows[MAXNCON], sr_flows[MAXNCON]; - float diff_lbavg, sr_lbavg, diff_cost, sr_cost; - idxtype *rbuffer, *sbuffer; - int *rcount, *rdispl; - float *solution, *load, *workspace; - EdgeType *degrees; - MatrixType matrix; - GraphType *egraph; - RInfoType *rinfo; + idx_t h, i, j; + idx_t nvtxs, nedges, ncon, pass, iter, domain, processor; + idx_t nparts, mype, npes, nlinks, me, you, wsize; + idx_t nvisited, nswaps = -1, tnswaps, done, alldone = -1; + idx_t *rowptr, *colind, *diff_where, *sr_where, *ehome, *map, *rmap; + idx_t *pack, *unpack, *match, *proc2sub, *sub2proc; + idx_t *visited, *gvisited; + real_t *transfer, *npwgts, maxdiff, minflow, maxflow; + real_t lbavg, oldlbavg, ubavg, *lbvec; + real_t *diff_flows, *sr_flows; + real_t diff_lbavg, sr_lbavg, diff_cost, sr_cost; + idx_t *rbuffer, *sbuffer; + idx_t *rcount, *rdispl; + real_t *solution, *load, *workspace; + matrix_t matrix; + graph_t *egraph; if (graph->ncon > 3) return 0; - nvtxs = graph->nvtxs; + WCOREPUSH; + + nvtxs = graph->nvtxs; nedges = graph->nedges; - ncon = graph->ncon; + ncon = graph->ncon; nparts = ctrl->nparts; - mype = ctrl->mype; - npes = ctrl->npes; - ubavg = savg(ncon, ctrl->ubvec); + mype = ctrl->mype; + npes = ctrl->npes; + ubavg = ravg(ncon, ctrl->ubvec); - /********************************************/ /* initialize variables and allocate memory */ - /********************************************/ - load = fmalloc(nparts*(2+ncon)+nedges*(1+ncon), "load"); - solution = load + nparts; - npwgts = graph->gnpwgts = load + 2*nparts; - matrix.values = load + (2+ncon)*nparts; - transfer = matrix.transfer = load + (2+ncon)*nparts + nedges; - - proc2sub = idxmalloc(amax(nparts, npes*2), "Mc_Diffusion: proc2sub"); - sub2proc = idxmalloc(nparts*3+nedges+1, "Mc_Diffusion: match"); - match = sub2proc + nparts; - rowptr = matrix.rowptr = sub2proc + 2*nparts; - colind = matrix.colind = sub2proc + 3*nparts + 1; - - rcount = imalloc(2*npes+1, "Mc_Diffusion: rcount"); - rdispl = rcount + npes; - - pack = idxmalloc(nvtxs*8, "Mc_Diffusion: pack"); - unpack = pack + nvtxs; - rbuffer = pack + 2*nvtxs; - sbuffer = pack + 3*nvtxs; - map = pack + 4*nvtxs; - rmap = pack + 5*nvtxs; - diff_where = pack + 6*nvtxs; - ehome = pack + 7*nvtxs; - - wsize = amax(sizeof(float)*nparts*6, sizeof(idxtype)*(nvtxs+nparts*2+1)); - workspace = (float *)GKmalloc(wsize, "Mc_Diffusion: workspace"); - degrees = GKmalloc(nedges*sizeof(EdgeType), "Mc_Diffusion: degrees"); - rinfo = graph->rinfo = GKmalloc(nvtxs*sizeof(RInfoType), "Mc_Diffusion: rinfo"); - - /******************************************/ + lbvec = rwspacemalloc(ctrl, ncon); + diff_flows = rwspacemalloc(ctrl, ncon); + sr_flows = rwspacemalloc(ctrl, ncon); + + load = rwspacemalloc(ctrl, nparts); + solution = rwspacemalloc(ctrl, nparts); + npwgts = graph->gnpwgts = rwspacemalloc(ctrl, ncon*nparts); + matrix.values = rwspacemalloc(ctrl, nedges); + transfer = matrix.transfer = rwspacemalloc(ctrl, ncon*nedges); + + proc2sub = iwspacemalloc(ctrl, gk_max(nparts, npes*2)); + sub2proc = iwspacemalloc(ctrl, nparts); + match = iwspacemalloc(ctrl, nparts); + rowptr = matrix.rowptr = iwspacemalloc(ctrl, nparts+1); + colind = matrix.colind = iwspacemalloc(ctrl, nedges); + + rcount = iwspacemalloc(ctrl, npes); + rdispl = iwspacemalloc(ctrl, npes+1); + + pack = iwspacemalloc(ctrl, nvtxs); + unpack = iwspacemalloc(ctrl, nvtxs); + rbuffer = iwspacemalloc(ctrl, nvtxs); + sbuffer = iwspacemalloc(ctrl, nvtxs); + map = iwspacemalloc(ctrl, nvtxs); + rmap = iwspacemalloc(ctrl, nvtxs); + diff_where = iwspacemalloc(ctrl, nvtxs); + ehome = iwspacemalloc(ctrl, nvtxs); + + + wsize = gk_max(sizeof(real_t)*nparts*6, sizeof(idx_t)*(nvtxs+nparts*2+1)); + workspace = (real_t *)gk_malloc(wsize, "Mc_Diffusion: workspace"); + + graph->ckrinfo = (ckrinfo_t *)gk_malloc(nvtxs*sizeof(ckrinfo_t), "Mc_Diffusion: rinfo"); + + /* construct subdomain connectivity graph */ - /******************************************/ matrix.nrows = nparts; - SetUpConnectGraph(graph, &matrix, (idxtype *)workspace); + SetUpConnectGraph(graph, &matrix, (idx_t *)workspace); nlinks = (matrix.nnzs-nparts) / 2; - visited = idxmalloc(matrix.nnzs*2, "visited"); - gvisited = visited + matrix.nnzs; + visited = iwspacemalloc(ctrl, matrix.nnzs); + gvisited = iwspacemalloc(ctrl, matrix.nnzs); for (pass=0; passtpwgts, h); - lbvec[h] = (load[samax(nparts, load)]+1.0/(float)nparts) * (float)nparts; + lbvec[h] = (rmax(nparts, load, 1)+1.0/nparts) * (real_t)nparts; ConjGrad2(&matrix, load, solution, 0.001, workspace); ComputeTransferVector(ncon, &matrix, solution, transfer, h); } - oldlbavg = savg(ncon, lbvec); + oldlbavg = ravg(ncon, lbvec); tnswaps = 0; maxdiff = 0.0; for (i=0; i maxdiff) ? maxflow - minflow : maxdiff; } } while (nvisited < nlinks) { - - /******************************************/ /* compute independent sets of subdomains */ - /******************************************/ - idxset(amax(nparts, npes*2), UNMATCHED, proc2sub); + iset(gk_max(nparts, npes*2), UNMATCHED, proc2sub); CSR_Match_SHEM(&matrix, match, proc2sub, gvisited, ncon); - /*****************************/ - /* Set up the packing arrays */ - /*****************************/ - idxset(nparts, UNMATCHED, sub2proc); + /* set up the packing arrays */ + iset(nparts, UNMATCHED, sub2proc); for (i=0; i0; i--) - rdispl[i] = rdispl[i-1]; - rdispl[0] = 0; + SHIFTCSR(i, npes, rdispl); - idxset(nvtxs, UNMATCHED, pack); + iset(nvtxs, UNMATCHED, pack); for (i=0; i 0.0) diff_flows[h] = -1.0 * transfer[j*ncon+h]; + } break; } } nswaps = 1; - scopy(ncon, diff_flows, sr_flows); + rcopy(ncon, diff_flows, sr_flows); - idxset(nvtxs, 0, sbuffer); - for (i=0; invtxs, egraph->where, diff_where); + icopy(egraph->nvtxs, egraph->where, diff_where); for (j=0; jnvtxs; j++) ehome[j] = home[map[j]]; RedoMyLink(ctrl, egraph, ehome, me, you, sr_flows, &sr_cost, &sr_lbavg); if (ncon <= 4) { - sr_where = egraph->where; + sr_where = egraph->where; egraph->where = diff_where; - nswaps = BalanceMyLink(ctrl, egraph, ehome, me, you, diff_flows, maxdiff, &diff_cost, &diff_lbavg, 1.0/(float)nvtxs); + nswaps = BalanceMyLink(ctrl, egraph, ehome, me, you, diff_flows, maxdiff, + &diff_cost, &diff_lbavg, 1.0/(real_t)nvtxs); if ((sr_lbavg < diff_lbavg && - (diff_lbavg >= ubavg-1.0 || sr_cost == diff_cost)) || - (sr_lbavg < ubavg-1.0 && sr_cost < diff_cost)) { + (diff_lbavg >= ubavg-1.0 || sr_cost == diff_cost)) || + (sr_lbavg < ubavg-1.0 && sr_cost < diff_cost)) { for (i=0; invtxs; i++) where[map[i]] = sr_where[i]; } @@ -241,76 +232,56 @@ int Mc_Diffusion(CtrlType *ctrl, GraphType *graph, idxtype *vtxdist, } } else { - for (i=0; invtxs; i++) - where[map[i]] = egraph->where[i]; + for (i=0; invtxs; i++) + where[map[i]] = egraph->where[i]; } - GKfree((void **)&egraph->xadj, (void **)&egraph->nvwgt, (void **)&egraph->adjncy, LTERM); - GKfree((void **)&egraph, LTERM); + gk_free((void **)&egraph->xadj, &egraph->nvwgt, &egraph->adjncy, &egraph, LTERM); } - /**********************/ /* Pack the flow data */ - /**********************/ - idxset(nvtxs, UNMATCHED, sbuffer); + iset(nvtxs, UNMATCHED, sbuffer); for (i=0; icomm); - + gkMPI_Allgatherv((void *)&sbuffer[rdispl[mype]], rcount[mype], IDX_T, + (void *)rbuffer, rcount, rdispl, IDX_T, ctrl->comm); - /************************/ /* Unpack the flow data */ - /************************/ for (i=0; icomm); - nvisited = idxsum(matrix.nnzs, gvisited)/2; + gkMPI_Allreduce((void *)visited, (void *)gvisited, matrix.nnzs, + IDX_T, MPI_MAX, ctrl->comm); + nvisited = isum(matrix.nnzs, gvisited, 1)/2; tnswaps += GlobalSESum(ctrl, nswaps); if (iter++ == NGD_PASSES) break; } - /*****************************/ /* perform serial refinement */ - /*****************************/ - Mc_ComputeSerialPartitionParams(graph, nparts, degrees); - Mc_SerialKWayAdaptRefine(graph, nparts, home, ctrl->ubvec, 10); + Mc_ComputeSerialPartitionParams(ctrl, graph, nparts); + Mc_SerialKWayAdaptRefine(ctrl, graph, nparts, home, ctrl->ubvec, 10); - - /****************************/ /* check for early breakout */ - /****************************/ for (h=0; h= oldlbavg || - lbavg <= ubavg + 0.035 - ) + if (tnswaps == 0 || lbavg >= oldlbavg || lbavg <= ubavg + 0.035) done = 1; alldone = GlobalSEMax(ctrl, done); @@ -318,11 +289,9 @@ int Mc_Diffusion(CtrlType *ctrl, GraphType *graph, idxtype *vtxdist, break; } - /*******************************************************/ /* ensure that all subdomains have at least one vertex */ - /*******************************************************/ /* - idxset(nparts, 0, match); + iset(nparts, 0, match); for (i=0; imype == PE) printf("WARNING: empty subdomain %d in Mc_Diffusion\n", me); - you = idxamax(nparts, match); + if (ctrl->mype == PE) printf("WARNING: empty subdomain %"PRIDX" in Mc_Diffusion\n", me); + you = iargmax(nparts, match); for (i=0; imype == PE) printf("WARNING: empty subdomain %d in Mc_Diffusion\n", me } */ - /******************************/ /* now free memory and return */ - /******************************/ - GKfree((void **)&load, (void **)&proc2sub, (void **)&sub2proc, (void **)&rcount, LTERM); - GKfree((void **)&pack, (void **)&workspace, (void **)°rees, (void **)&rinfo, LTERM); - GKfree((void **)&visited, LTERM); + gk_free((void **)&workspace, (void **)&graph->ckrinfo, LTERM); graph->gnpwgts = NULL; - graph->rinfo = NULL; + graph->ckrinfo = NULL; + + WCOREPOP; return 0; } @@ -363,25 +330,25 @@ if (ctrl->mype == PE) printf("WARNING: empty subdomain %d in Mc_Diffusion\n", me /************************************************************************* * This function extracts a subgraph from a graph given an indicator array. **************************************************************************/ -GraphType *ExtractGraph(CtrlType *ctrl, GraphType *graph, idxtype *indicator, - idxtype *map, idxtype *rmap) +graph_t *ExtractGraph(ctrl_t *ctrl, graph_t *graph, idx_t *indicator, + idx_t *map, idx_t *rmap) { - int h, i, j; - int nvtxs, envtxs, enedges, ncon; - int vtx, count; - idxtype *xadj, *vsize, *adjncy, *adjwgt, *where; - idxtype *exadj, *evsize, *eadjncy, *eadjwgt, *ewhere; - float *nvwgt, *envwgt; - GraphType *egraph; - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - nvwgt = graph->nvwgt; - vsize = graph->vsize; + idx_t h, i, j; + idx_t nvtxs, envtxs, enedges, ncon; + idx_t vtx, count; + idx_t *xadj, *vsize, *adjncy, *adjwgt, *where; + idx_t *exadj, *evsize, *eadjncy, *eadjwgt, *ewhere; + real_t *nvwgt, *envwgt; + graph_t *egraph; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + nvwgt = graph->nvwgt; + vsize = graph->vsize; adjncy = graph->adjncy; adjwgt = graph->adjwgt; - where = graph->where; + where = graph->where; count = 0; for (i=0; invtxs = count; egraph->ncon = graph->ncon; - exadj = egraph->xadj = idxmalloc(envtxs*3+1, "exadj"); + exadj = egraph->xadj = imalloc(envtxs*3+1, "exadj"); ewhere = egraph->where = exadj + envtxs + 1; evsize = egraph->vsize = exadj + 2*envtxs + 1; - envwgt = egraph->nvwgt = fmalloc(envtxs*ncon, "envwgt"); + envwgt = egraph->nvwgt = rmalloc(envtxs*ncon, "envwgt"); /************************************************/ /* compute xadj, where, nvwgt, and vsize arrays */ /************************************************/ - idxset(envtxs+1, 0, exadj); + iset(envtxs+1, 0, exadj); for (i=0; inedges = exadj[envtxs]; - eadjncy = egraph->adjncy = idxmalloc(enedges*2, "eadjncy"); + eadjncy = egraph->adjncy = imalloc(enedges*2, "eadjncy"); eadjwgt = egraph->adjwgt = eadjncy + enedges; for (i=0; i - - -/*************************************************************************/ -/*! This function allocate various pools of memory */ -/*************************************************************************/ -void AllocateWSpace(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) -{ - int nitems = amax(graph->nedges+graph->nvtxs, 1+(graph->nsend+graph->nrecv)/2); - - wspace->nlarge = 2*nitems; - wspace->nparts = ctrl->nparts; - wspace->npes = ctrl->npes; - - wspace->maxcore = 8*nitems+1; - wspace->core = idxmalloc(wspace->maxcore, "AllocateWSpace: wspace->core"); - - wspace->pairs = (KeyValueType *)wspace->core; - wspace->indices = (idxtype *)(wspace->pairs + wspace->nlarge); - wspace->degrees = (EdgeType *)(wspace->indices + wspace->nlarge); - - - wspace->pv1 = idxmalloc(ctrl->nparts+ctrl->npes+1, "AllocateWSpace: wspace->pv1"); - wspace->pv2 = idxmalloc(ctrl->nparts+ctrl->npes+1, "AllocateWSpace: wspace->pv2"); - wspace->pv3 = idxmalloc(ctrl->nparts+ctrl->npes+1, "AllocateWSpace: wspace->pv3"); - wspace->pv4 = idxmalloc(ctrl->nparts+ctrl->npes+1, "AllocateWSpace: wspace->pv4"); - - wspace->pepairs1 = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*(ctrl->nparts+ctrl->npes+1), "AllocateWSpace: wspace->pepairs?"); - wspace->pepairs2 = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*(ctrl->nparts+ctrl->npes+1), "AllocateWSpace: wspace->pepairs?"); - -} - -/*************************************************************************/ -/*! This function re-allocates the workspace if previous one is not large - enough */ -/*************************************************************************/ -void AdjustWSpace(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) -{ - int nitems = amax(graph->nedges+graph->nvtxs, 1+(graph->nsend+graph->nrecv)/2); - - if (wspace->nlarge < 2*nitems || - wspace->nparts < ctrl->nparts || - wspace->npes < ctrl->npes) { - FreeWSpace(wspace); - AllocateWSpace(ctrl, graph, wspace); - } -} - -/*************************************************************************/ -/*! This function de-allocate various pools of memory */ -/**************************************************************************/ -void FreeWSpace(WorkSpaceType *wspace) -{ - - GKfree((void **)&wspace->core, - (void **)&wspace->pv1, - (void **)&wspace->pv2, - (void **)&wspace->pv3, - (void **)&wspace->pv4, - (void **)&wspace->pepairs1, - (void **)&wspace->pepairs2, - LTERM); -} - - -/************************************************************************* -* This function de-allocates memory allocated for the control structures -**************************************************************************/ -void FreeCtrl(CtrlType *ctrl) -{ - MPI_Comm_free(&(ctrl->gcomm)); -} - - -/************************************************************************* -* This function creates a CoarseGraphType data structure and initializes -* the various fields -**************************************************************************/ -GraphType *CreateGraph(void) -{ - GraphType *graph; - - graph = (GraphType *)GKmalloc(sizeof(GraphType), "CreateCoarseGraph: graph"); - - InitGraph(graph); - - return graph; -} - - -/************************************************************************* -* This function creates a CoarseGraphType data structure and initializes -* the various fields -**************************************************************************/ -void InitGraph(GraphType *graph) -{ - graph->gnvtxs = graph->nvtxs = graph->nedges = graph->nsep = -1; - graph->nnbrs = graph->nrecv = graph->nsend = graph->nlocal = -1; - graph->xadj = graph->vwgt = graph->vsize = graph->adjncy = graph->adjwgt = NULL; - graph->nvwgt = NULL; - graph->vtxdist = NULL; - graph->match = graph->cmap = NULL; - graph->label = NULL; - - graph->peind = NULL; - graph->sendptr = graph->sendind = graph->recvptr = graph->recvind = NULL; - graph->imap = NULL; - graph->pexadj = graph->peadjncy = graph->peadjloc = NULL; - graph->lperm = NULL; - - graph->slens = graph->rlens = NULL; - graph->rcand = NULL; - - graph->where = graph->home = graph->lpwgts = graph->gpwgts = NULL; - graph->lnpwgts = graph->gnpwgts = NULL; - graph->rinfo = NULL; - - graph->nrinfo = NULL; - graph->sepind = NULL; - - graph->coarser = graph->finer = NULL; - -} - -/*************************************************************************/ -/*! This function deallocates any memory stored in a graph */ -/*************************************************************************/ -void FreeGraph(GraphType *graph) -{ - - /* Graph structure fields */ - GKfree((void **)&graph->xadj, - (void **)&graph->vwgt, - (void **)&graph->nvwgt, - (void **)&graph->vsize, - (void **)&graph->adjncy, - (void **)&graph->adjwgt, - (void **)&graph->vtxdist, - (void **)&graph->home, - LTERM); - - FreeNonGraphFields(graph); - - GKfree((void **)&graph, LTERM); -} - - -/*************************************************************************/ -/*! This function deallocates the non-graph structure fields of a graph - data structure */ -/*************************************************************************/ -void FreeNonGraphFields(GraphType *graph) -{ - - GKfree( - /* Coarsening fields */ - (void **)&graph->match, - (void **)&graph->cmap, - - /* Initial partitioning fields */ - (void **)&graph->label, - - /* Communication/Setup fields */ - (void **)&graph->peind, - (void **)&graph->sendptr, - (void **)&graph->sendind, - (void **)&graph->recvptr, - (void **)&graph->recvind, - (void **)&graph->imap, - (void **)&graph->pexadj, - (void **)&graph->peadjncy, - (void **)&graph->peadjloc, - (void **)&graph->lperm, - - /* Projection fields */ - (void **)&graph->rlens, - (void **)&graph->slens, - (void **)&graph->rcand, - - /* Refinement fields */ - (void **)&graph->where, - (void **)&graph->lpwgts, - (void **)&graph->gpwgts, - (void **)&graph->lnpwgts, - (void **)&graph->gnpwgts, - (void **)&graph->rinfo, - (void **)&graph->nrinfo, - (void **)&graph->sepind, - - LTERM); -} - - -/*************************************************************************/ -/*! This function deallocates the non-graph and non-setup structure fields - of a graph data structure */ -/*************************************************************************/ -void FreeNonGraphNonSetupFields(GraphType *graph) -{ - - GKfree( - /* Coarsening fields */ - (void **)&graph->match, - (void **)&graph->cmap, - - /* Initial partitioning fields */ - (void **)&graph->label, - - /* Projection fields */ - (void **)&graph->rlens, - (void **)&graph->slens, - (void **)&graph->rcand, - - /* Refinement fields */ - (void **)&graph->where, - (void **)&graph->lpwgts, - (void **)&graph->gpwgts, - (void **)&graph->lnpwgts, - (void **)&graph->gnpwgts, - (void **)&graph->rinfo, - (void **)&graph->nrinfo, - (void **)&graph->sepind, - - LTERM); -} - - -/*************************************************************************/ -/*! This function frees any memory allocated for storing the initial graph - and performs the local to global (i.e., original numbering of the - adjacency list) -*/ -/*************************************************************************/ -void FreeInitialGraphAndRemap(GraphType *graph, int wgtflag, int freevsize) -{ - int i, nedges; - idxtype *adjncy, *imap; - - nedges = graph->nedges; - adjncy = graph->adjncy; - imap = graph->imap; - - if (imap != NULL) { - for (i=0; invwgt, &graph->home, LTERM); - - if (freevsize) - GKfree((void **)&graph->vsize, LTERM); - if ((wgtflag&2) == 0) - GKfree((void **)&graph->vwgt, LTERM); - if ((wgtflag&1) == 0) - GKfree((void **)&graph->adjwgt, LTERM); - - GKfree((void **)&graph, LTERM); -} diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/mesh.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/mesh.c index 078af038..1bcda5f8 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/mesh.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/mesh.c @@ -9,7 +9,7 @@ * Started 10/19/94 * George * - * $Id: mesh.c,v 1.11 2003/07/25 04:01:04 karypis Exp $ + * $Id: mesh.c 10575 2011-07-14 14:46:42Z karypis $ * */ @@ -19,32 +19,34 @@ /************************************************************************* * This function converts a mesh into a dual graph **************************************************************************/ -void ParMETIS_V3_Mesh2Dual(idxtype *elmdist, idxtype *eptr, idxtype *eind, - int *numflag, int *ncommonnodes, idxtype **xadj, - idxtype **adjncy, MPI_Comm *comm) +int ParMETIS_V3_Mesh2Dual(idx_t *elmdist, idx_t *eptr, idx_t *eind, + idx_t *numflag, idx_t *ncommon, idx_t **r_xadj, + idx_t **r_adjncy, MPI_Comm *comm) { - int i, j, jj, k, kk, m; - int npes, mype, pe, count, mask, pass; - int nelms, lnns, my_nns, node; - int firstelm, firstnode, lnode, nrecv, nsend; - int *scounts, *rcounts, *sdispl, *rdispl; - idxtype *nodedist, *nmap, *auxarray; - idxtype *gnptr, *gnind, *nptr, *nind, *myxadj, *myadjncy = NULL; - idxtype *sbuffer, *rbuffer, *htable; - KeyValueType *nodelist, *recvbuffer; - idxtype maxcount, *ind, *wgt; - int gmaxnode, gminnode; - CtrlType ctrl; + idx_t i, j, jj, k, kk, m; + idx_t npes, mype, pe, count, mask, pass; + idx_t nelms, lnns, my_nns, node; + idx_t firstelm, firstnode, lnode, nrecv, nsend; + idx_t *scounts, *rcounts, *sdispl, *rdispl; + idx_t *nodedist, *nmap, *auxarray; + idx_t *gnptr, *gnind, *nptr, *nind, *myxadj=NULL, *myadjncy = NULL; + idx_t *sbuffer, *rbuffer, *htable; + ikv_t *nodelist, *recvbuffer; + idx_t maxcount, *ind, *wgt; + idx_t gmaxnode, gminnode; + size_t curmem; + + gk_malloc_init(); + curmem = gk_GetCurMemoryUsed(); + + /* Get basic comm info */ + gkMPI_Comm_size(*comm, &npes); + gkMPI_Comm_rank(*comm, &mype); - SetUpCtrl(&ctrl, -1, 0, *comm); - - npes = ctrl.npes; - mype = ctrl.mype; - nelms = elmdist[mype+1]-elmdist[mype]; - if (*numflag == 1) + if (*numflag > 0) ChangeNumberingMesh(elmdist, eptr, eind, NULL, NULL, NULL, npes, mype, 1); mask = (1<<11)-1; @@ -52,20 +54,20 @@ void ParMETIS_V3_Mesh2Dual(idxtype *elmdist, idxtype *eptr, idxtype *eind, /*****************************/ /* Determine number of nodes */ /*****************************/ - gminnode = GlobalSEMin(&ctrl, eind[idxamin(eptr[nelms], eind)]); + gminnode = GlobalSEMinComm(*comm, imin(eptr[nelms], eind, 1)); for (i=0; i 0); + ASSERT(nelms > 0); /* construct node distribution array */ - nodedist = idxsmalloc(npes+1, 0, "nodedist"); + nodedist = ismalloc(npes+1, 0, "nodedist"); for (nodedist[0]=0, i=0,j=gmaxnode+1; i nodelist[i-1].key) @@ -101,7 +103,7 @@ void ParMETIS_V3_Mesh2Dual(idxtype *elmdist, idxtype *eptr, idxtype *eind, } lnns = count; - nmap = idxmalloc(lnns, "nmap"); + nmap = imalloc(lnns, "nmap"); /* renumber the nodes of the elements array */ count = 1; @@ -116,7 +118,7 @@ void ParMETIS_V3_Mesh2Dual(idxtype *elmdist, idxtype *eptr, idxtype *eind, eind[nodelist[i].val] = count-1; nodelist[i].val = auxarray[nodelist[i].val]; /* Store the local element ID */ } - MPI_Barrier(*comm); + gkMPI_Barrier(*comm); /**********************************************************/ /* perform comms necessary to construct node-element list */ @@ -127,9 +129,9 @@ void ParMETIS_V3_Mesh2Dual(idxtype *elmdist, idxtype *eptr, idxtype *eind, pe++; scounts[pe] += 2; } - ASSERTS(pe < npes); + ASSERT(pe < npes); - MPI_Alltoall((void *)scounts, 1, MPI_INT, (void *)rcounts, 1, MPI_INT, *comm); + gkMPI_Alltoall((void *)scounts, 1, IDX_T, (void *)rcounts, 1, IDX_T, *comm); icopy(npes, scounts, sdispl); MAKECSR(i, npes, sdispl); @@ -137,30 +139,30 @@ void ParMETIS_V3_Mesh2Dual(idxtype *elmdist, idxtype *eptr, idxtype *eind, icopy(npes, rcounts, rdispl); MAKECSR(i, npes, rdispl); - ASSERTS(sdispl[npes] == eptr[nelms]*2); + ASSERT(sdispl[npes] == eptr[nelms]*2); nrecv = rdispl[npes]/2; - recvbuffer = (KeyValueType *)GKmalloc(amax(1, nrecv)*sizeof(KeyValueType), "recvbuffer"); + recvbuffer = ikvmalloc(gk_max(1, nrecv), "recvbuffer"); - MPI_Alltoallv((void *)nodelist, scounts, sdispl, IDX_DATATYPE, (void *)recvbuffer, - rcounts, rdispl, IDX_DATATYPE, *comm); + gkMPI_Alltoallv((void *)nodelist, scounts, sdispl, IDX_T, (void *)recvbuffer, + rcounts, rdispl, IDX_T, *comm); /**************************************/ /* construct global node-element list */ /**************************************/ - gnptr = idxsmalloc(my_nns+1, 0, "gnptr"); + gnptr = ismalloc(my_nns+1, 0, "gnptr"); for (i=0; i= 0 && lnode < my_nns) + ASSERT(lnode >= 0 && lnode < my_nns) gnptr[lnode]++; } } MAKECSR(i, my_nns, gnptr); - gnind = idxmalloc(amax(1, gnptr[my_nns]), "gnind"); + gnind = imalloc(gk_max(1, gnptr[my_nns]), "gnind"); for (pe=0; pe= *ncommonnodes) { + if (wgt[j] >= *ncommon) { if (pass == 0) myxadj[i]++; else @@ -328,7 +325,9 @@ void ParMETIS_V3_Mesh2Dual(idxtype *elmdist, idxtype *eptr, idxtype *eind, if (pass == 0) { MAKECSR(i, nelms, myxadj); - myadjncy = *adjncy = idxmalloc(myxadj[nelms], "adjncy"); + myadjncy = *r_adjncy = (idx_t *)malloc(sizeof(idx_t)*myxadj[nelms]); + if (myadjncy == NULL) + gk_errexit(SIGMEM, "Failed to allocate memory for dual graph's adjncy array.\n"); } else { SHIFTCSR(i, nelms, myxadj); @@ -345,12 +344,16 @@ void ParMETIS_V3_Mesh2Dual(idxtype *elmdist, idxtype *eptr, idxtype *eind, ChangeNumberingMesh(elmdist, eptr, eind, myxadj, myadjncy, NULL, npes, mype, 0); /* do not free nodelist, recvbuffer, rbuffer */ - GKfree((void **)&scounts, (void **)&nodedist, (void **)&nmap, (void **)&sbuffer, - (void **)&htable, (void **)&nptr, (void **)&nind, (void **)&gnptr, - (void **)&gnind, (void **)&auxarray, &ind, &wgt, LTERM); + gk_free((void **)&nodedist, &nodelist, &auxarray, &htable, &scounts, &rcounts, + &sdispl, &rdispl, &nmap, &recvbuffer, &gnptr, &gnind, &sbuffer, &rbuffer, + &nptr, &ind, &wgt, LTERM); - FreeCtrl(&ctrl); + if (gk_GetCurMemoryUsed() - curmem > 0) { + printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n", + (ssize_t)(gk_GetCurMemoryUsed() - curmem)); + } + gk_malloc_cleanup(0); - return; + return METIS_OK; } diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/mmetis.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/mmetis.c index 916ae271..41b53532 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/mmetis.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/mmetis.c @@ -8,7 +8,7 @@ * Started 10/19/96 * George * - * $Id: mmetis.c,v 1.8 2003/07/25 04:01:04 karypis Exp $ + * $Id: mmetis.c 10573 2011-07-14 13:31:54Z karypis $ * */ @@ -20,78 +20,69 @@ * This function assumes nothing about the mesh distribution. * It is the general case. ************************************************************************************/ -void ParMETIS_V3_PartMeshKway(idxtype *elmdist, idxtype *eptr, idxtype *eind, idxtype *elmwgt, - int *wgtflag, int *numflag, int *ncon, int *ncommonnodes, int *nparts, - float *tpwgts, float *ubvec, int *options, int *edgecut, idxtype *part, - MPI_Comm *comm) +int ParMETIS_V3_PartMeshKway(idx_t *elmdist, idx_t *eptr, idx_t *eind, idx_t *elmwgt, + idx_t *wgtflag, idx_t *numflag, idx_t *ncon, idx_t *ncommon, idx_t *nparts, + real_t *tpwgts, real_t *ubvec, idx_t *options, idx_t *edgecut, idx_t *part, + MPI_Comm *comm) { - int i, nvtxs, nedges, gnedges, npes, mype; - idxtype *xadj, *adjncy; - timer TotalTmr, Mesh2DualTmr, ParMETISTmr; - CtrlType ctrl; - - /********************************/ - /* Try and take care bad inputs */ - /********************************/ - if (elmdist == NULL || eptr == NULL || eind == NULL || wgtflag == NULL || - numflag == NULL || ncon == NULL || ncommonnodes == NULL || nparts == NULL || - tpwgts == NULL || ubvec == NULL || options == NULL || edgecut == NULL || - part == NULL || comm == NULL) { - printf("ERROR: One or more required parameters is NULL. Aborting.\n"); - abort(); - } - if (((*wgtflag)&2) && elmwgt == NULL) { - printf("ERROR: elmwgt == NULL when vertex weights were specified. Aborting.\n"); - abort(); - } + idx_t i, status, nvtxs, nedges, gnedges, npes, mype; + idx_t *xadj, *adjncy; + ctrl_t *ctrl; + size_t curmem; + /* Check the input parameters and return if an error */ + status = CheckInputsPartMeshKway(elmdist, eptr, eind, elmwgt, wgtflag, numflag, + ncon, ncommon, nparts, tpwgts, ubvec, options, edgecut, part, comm); + if (GlobalSEMinComm(*comm, status) == 0) + return METIS_ERROR; - SetUpCtrl(&ctrl, *nparts, (options[0] == 1 ? options[PMV3_OPTION_DBGLVL] : 0), *comm); + status = METIS_OK; + gk_malloc_init(); + curmem = gk_GetCurMemoryUsed(); - npes = ctrl.npes; - mype = ctrl.mype; + /* Setup the ctrl */ + ctrl = SetupCtrl(PARMETIS_OP_MKMETIS, NULL, 1, 1, NULL, NULL, *comm); + npes = ctrl->npes; + mype = ctrl->mype; - cleartimer(TotalTmr); - cleartimer(Mesh2DualTmr); - cleartimer(ParMETISTmr); - MPI_Barrier(ctrl.comm); - starttimer(TotalTmr); - starttimer(Mesh2DualTmr); + /* Create the dual graph */ + STARTTIMER(ctrl, ctrl->MoveTmr); - ParMETIS_V3_Mesh2Dual(elmdist, eptr, eind, numflag, ncommonnodes, &xadj, &adjncy, - &(ctrl.comm)); + ParMETIS_V3_Mesh2Dual(elmdist, eptr, eind, numflag, ncommon, &xadj, &adjncy, + &(ctrl->comm)); - if (ctrl.dbglvl&DBG_INFO) { + if (ctrl->dbglvl&DBG_INFO) { nvtxs = elmdist[mype+1]-elmdist[mype]; nedges = xadj[nvtxs] + (*numflag == 0 ? 0 : -1); - rprintf(&ctrl, "Completed Dual Graph -- Nvtxs: %d, Nedges: %d \n", - elmdist[npes], GlobalSESum(&ctrl, nedges)); + rprintf(ctrl, "Completed Dual Graph -- Nvtxs: %"PRIDX", Nedges: %"PRIDX" \n", + elmdist[npes], GlobalSESum(ctrl, nedges)); } - MPI_Barrier(ctrl.comm); - stoptimer(Mesh2DualTmr); + STOPTIMER(ctrl, ctrl->MoveTmr); - /***********************/ - /* Partition the graph */ - /***********************/ - starttimer(ParMETISTmr); + /* Partition the dual graph */ + STARTTIMER(ctrl, ctrl->TotalTmr); - ParMETIS_V3_PartKway(elmdist, xadj, adjncy, elmwgt, NULL, wgtflag, numflag, ncon, - nparts, tpwgts, ubvec, options, edgecut, part, &(ctrl.comm)); + status = ParMETIS_V3_PartKway(elmdist, xadj, adjncy, elmwgt, NULL, wgtflag, + numflag, ncon, nparts, tpwgts, ubvec, options, edgecut, part, + &(ctrl->comm)); - MPI_Barrier(ctrl.comm); - stoptimer(ParMETISTmr); - stoptimer(TotalTmr); + STOPTIMER(ctrl, ctrl->TotalTmr); - IFSET(ctrl.dbglvl, DBG_TIME, PrintTimer(&ctrl, Mesh2DualTmr, " Mesh2Dual")); - IFSET(ctrl.dbglvl, DBG_TIME, PrintTimer(&ctrl, ParMETISTmr, " ParMETIS")); - IFSET(ctrl.dbglvl, DBG_TIME, PrintTimer(&ctrl, TotalTmr, " Total")); + IFSET(ctrl->dbglvl, DBG_TIME, PrintTimer(ctrl, ctrl->MoveTmr, " Mesh2Dual")); + IFSET(ctrl->dbglvl, DBG_TIME, PrintTimer(ctrl, ctrl->TotalTmr, " ParMETIS")); - GKfree((void **)&xadj, (void **)&adjncy, LTERM); + METIS_Free(xadj); + METIS_Free(adjncy); FreeCtrl(&ctrl); + if (gk_GetCurMemoryUsed() - curmem > 0) { + printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n", + (ssize_t)(gk_GetCurMemoryUsed() - curmem)); + } + gk_malloc_cleanup(0); - return; + return (int)status; } diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/move.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/move.c index d205165d..8db3d7e9 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/move.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/move.c @@ -8,28 +8,31 @@ * Started 11/22/96 * George * - * $Id: move.c,v 1.3 2003/07/31 16:23:30 karypis Exp $ + * $Id: move.c 10657 2011-08-03 14:34:35Z karypis $ * */ #include -/************************************************************************* -* This function moves the graph, and returns a new graph. -* This routine can be called with or without performing refinement. -* In the latter case it allocates and computes lpwgts itself. -**************************************************************************/ -GraphType *Mc_MoveGraph(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) +/*************************************************************************/ +/*! This function moves the graph, and returns a new graph. + This routine can be called with or without performing refinement. + In the latter case it allocates and computes lpwgts itself. +*/ +/*************************************************************************/ +graph_t *MoveGraph(ctrl_t *ctrl, graph_t *graph) { - int h, i, ii, j, jj, nvtxs, ncon, npes; - idxtype *xadj, *vwgt, *adjncy, *adjwgt, *mvtxdist; - idxtype *where, *newlabel, *lpwgts, *gpwgts; - idxtype *sgraph, *rgraph; - KeyValueType *sinfo, *rinfo; - GraphType *mgraph; + idx_t h, i, ii, j, jj, nvtxs, ncon, npes, nsnbrs, nrnbrs; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *mvtxdist; + idx_t *where, *newlabel, *lpwgts, *gpwgts; + idx_t *sgraph, *rgraph; + ikv_t *sinfo, *rinfo; + graph_t *mgraph; + + WCOREPUSH; /* this routine only works when nparts <= npes */ - ASSERT(ctrl, ctrl->nparts <= ctrl->npes); + PASSERT(ctrl, ctrl->nparts <= ctrl->npes); npes = ctrl->npes; @@ -41,13 +44,14 @@ GraphType *Mc_MoveGraph(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) adjwgt = graph->adjwgt; where = graph->where; - mvtxdist = idxmalloc(npes+1, "MoveGraph: mvtxdist"); + mvtxdist = imalloc(npes+1, "MoveGraph: mvtxdist"); /* Let's do a prefix scan to determine the labeling of the nodes given */ - lpwgts = wspace->pv1; - gpwgts = wspace->pv2; - sinfo = wspace->pepairs1; - rinfo = wspace->pepairs2; + lpwgts = iwspacemalloc(ctrl, npes+1); + gpwgts = iwspacemalloc(ctrl, npes+1); + sinfo = ikvwspacemalloc(ctrl, npes); + rinfo = ikvwspacemalloc(ctrl, npes); + for (i=0; icomm); - MPI_Allreduce((void *)lpwgts, (void *)mvtxdist, npes, IDX_DATATYPE, MPI_SUM, ctrl->comm); - + gkMPI_Scan((void *)lpwgts, (void *)gpwgts, npes, IDX_T, MPI_SUM, ctrl->comm); + gkMPI_Allreduce((void *)lpwgts, (void *)mvtxdist, npes, IDX_T, MPI_SUM, ctrl->comm); MAKECSR(i, npes, mvtxdist); - /* gpwgts[i] will store the label of the first vertex for each domain in each processor */ - for (i=0; inrecv, "MoveGraph: newlabel"); - + newlabel = iwspacemalloc(ctrl, nvtxs+graph->nrecv); for (i=0; iindices, newlabel+nvtxs); + CommInterfaceData(ctrl, graph, newlabel, newlabel+nvtxs); /* Now lets tell everybody what and from where he will get it. */ - MPI_Alltoall((void *)sinfo, 2, IDX_DATATYPE, (void *)rinfo, 2, IDX_DATATYPE, ctrl->comm); + gkMPI_Alltoall((void *)sinfo, 2, IDX_T, (void *)rinfo, 2, IDX_T, ctrl->comm); /* Use lpwgts and gpwgts as pointers to where data will be received and send */ lpwgts[0] = 0; /* Send part */ gpwgts[0] = 0; /* Received part */ - for (i=0; i 0) + nrnbrs++; + if (sinfo[i].key > 0) + nsnbrs++; } - if (lpwgts[npes]+gpwgts[npes] > wspace->maxcore) { - /* Adjust core memory, incase the graph was originally very memory unbalanced */ - GKfree((void **)&wspace->core, LTERM); - wspace->maxcore = lpwgts[npes]+4*gpwgts[npes]; /* In spirit of the 8*nedges */ - wspace->core = idxmalloc(wspace->maxcore, "Mc_MoveGraph: wspace->core"); - } + /* Update the max # of sreq/rreq/statuses */ + CommUpdateNnbrs(ctrl, gk_max(nsnbrs, nrnbrs)); - sgraph = wspace->core; - rgraph = wspace->core + lpwgts[npes]; + rgraph = iwspacemalloc(ctrl, gpwgts[npes]); + WCOREPUSH; /* for freeing the send part early */ + sgraph = iwspacemalloc(ctrl, lpwgts[npes]); /* Issue the receives first */ - for (i=0; i 0) - MPI_Irecv((void *)(rgraph+gpwgts[i]), gpwgts[i+1]-gpwgts[i], IDX_DATATYPE, - i, 1, ctrl->comm, ctrl->rreq+i); - else { - ASSERT(ctrl, gpwgts[i+1]-gpwgts[i] == 0); - } + gkMPI_Irecv((void *)(rgraph+gpwgts[i]), gpwgts[i+1]-gpwgts[i], IDX_T, + i, 1, ctrl->comm, ctrl->rreq+j++); + else + PASSERT(ctrl, gpwgts[i+1]-gpwgts[i] == 0); } /* Assemble the graph to be sent and send it */ for (i=0; i= 0 && where[i] < npes); + PASSERT(ctrl, where[i] >= 0 && where[i] < npes); ii = lpwgts[where[i]]; sgraph[ii++] = xadj[i+1]-xadj[i]; for (h=0; h 0) - MPI_Isend((void *)(sgraph+lpwgts[i]), lpwgts[i+1]-lpwgts[i], IDX_DATATYPE, - i, 1, ctrl->comm, ctrl->sreq+i); - else { - ASSERT(ctrl, lpwgts[i+1]-lpwgts[i] == 0); - } + gkMPI_Isend((void *)(sgraph+lpwgts[i]), lpwgts[i+1]-lpwgts[i], IDX_T, + i, 1, ctrl->comm, ctrl->sreq+j++); + else + PASSERT(ctrl, lpwgts[i+1]-lpwgts[i] == 0); } /* Wait for the send/recv to finish */ - for (i=0; i 0) - MPI_Wait(ctrl->sreq+i, &ctrl->status); - } - for (i=0; i 0) - MPI_Wait(ctrl->rreq+i, &ctrl->status); - } + gkMPI_Waitall(nrnbrs, ctrl->rreq, ctrl->statuses); + gkMPI_Waitall(nsnbrs, ctrl->sreq, ctrl->statuses); + + WCOREPOP; /* frees sgraph */ - /* OK, now go and put the graph into GraphType Format */ + /* OK, now go and put the graph into graph_t Format */ mgraph = CreateGraph(); mgraph->vtxdist = mvtxdist; @@ -155,28 +153,27 @@ GraphType *Mc_MoveGraph(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) mgraph->nedges += rinfo[i].val; } nvtxs = mgraph->nvtxs; - xadj = mgraph->xadj = idxmalloc(nvtxs+1, "MMG: mgraph->xadj"); - vwgt = mgraph->vwgt = idxmalloc(nvtxs*ncon, "MMG: mgraph->vwgt"); - adjncy = mgraph->adjncy = idxmalloc(mgraph->nedges, "MMG: mgraph->adjncy"); - adjwgt = mgraph->adjwgt = idxmalloc(mgraph->nedges, "MMG: mgraph->adjwgt"); + xadj = mgraph->xadj = imalloc(nvtxs+1, "MMG: mgraph->xadj"); + vwgt = mgraph->vwgt = imalloc(nvtxs*ncon, "MMG: mgraph->vwgt"); + adjncy = mgraph->adjncy = imalloc(mgraph->nedges, "MMG: mgraph->adjncy"); + adjwgt = mgraph->adjwgt = imalloc(mgraph->nedges, "MMG: mgraph->adjwgt"); for (jj=ii=i=0; inedges); - ASSERT(ctrl, ii == gpwgts[npes]); - ASSERTP(ctrl, jj == mgraph->nedges, (ctrl, "%d %d\n", jj, mgraph->nedges)); - ASSERTP(ctrl, ii == gpwgts[npes], (ctrl, "%d %d %d %d %d\n", ii, gpwgts[npes], jj, mgraph->nedges, nvtxs)); - - GKfree((void **)&newlabel, LTERM); + PASSERT(ctrl, jj == mgraph->nedges); + PASSERT(ctrl, ii == gpwgts[npes]); + PASSERTP(ctrl, jj == mgraph->nedges, (ctrl, "%"PRIDX" %"PRIDX"\n", jj, mgraph->nedges)); + PASSERTP(ctrl, ii == gpwgts[npes], (ctrl, "%"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"\n", + ii, gpwgts[npes], jj, mgraph->nedges, nvtxs)); #ifdef DEBUG IFSET(ctrl->dbglvl, DBG_INFO, rprintf(ctrl, "Checking moved graph...\n")); @@ -184,6 +181,8 @@ GraphType *Mc_MoveGraph(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) IFSET(ctrl->dbglvl, DBG_INFO, rprintf(ctrl, "Moved graph is consistent.\n")); #endif + WCOREPOP; + return mgraph; } @@ -196,62 +195,61 @@ GraphType *Mc_MoveGraph(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) * and it is used to get the inverse mapping information. * The routine assumes that graph->where corresponds to a npes-way partition. **************************************************************************/ -void ProjectInfoBack(CtrlType *ctrl, GraphType *graph, idxtype *info, idxtype *minfo, - WorkSpaceType *wspace) +void ProjectInfoBack(ctrl_t *ctrl, graph_t *graph, idx_t *info, idx_t *minfo) { - int i, nvtxs, nparts; - idxtype *where, *auxinfo, *sinfo, *rinfo; + idx_t i, nvtxs, nparts, nrecvs, nsends; + idx_t *where, *auxinfo, *sinfo, *rinfo; + + WCOREPUSH; nparts = ctrl->npes; nvtxs = graph->nvtxs; where = graph->where; - sinfo = wspace->pv1; - rinfo = wspace->pv2; + sinfo = iwspacemalloc(ctrl, nparts+1); + rinfo = iwspacemalloc(ctrl, nparts+1); /* Find out in rinfo how many entries are received per partition */ - idxset(nparts, 0, rinfo); + iset(nparts, 0, rinfo); for (i=0; icomm); + gkMPI_Alltoall((void *)rinfo, 1, IDX_T, (void *)sinfo, 1, IDX_T, ctrl->comm); MAKECSR(i, nparts, sinfo); MAKECSR(i, nparts, rinfo); /* allocate memory for auxinfo */ - auxinfo = idxmalloc(rinfo[nparts], "ProjectInfoBack: auxinfo"); + auxinfo = iwspacemalloc(ctrl, rinfo[nparts]); /*----------------------------------------------------------------- * Now, go and send back the minfo -----------------------------------------------------------------*/ - for (i=0; i 0) - MPI_Irecv((void *)(auxinfo+rinfo[i]), rinfo[i+1]-rinfo[i], IDX_DATATYPE, i, 1, ctrl->comm, ctrl->rreq+i); + gkMPI_Irecv((void *)(auxinfo+rinfo[i]), rinfo[i+1]-rinfo[i], IDX_T, + i, 1, ctrl->comm, ctrl->rreq+nrecvs++); } - for (i=0; i 0) - MPI_Isend((void *)(minfo+sinfo[i]), sinfo[i+1]-sinfo[i], IDX_DATATYPE, i, 1, ctrl->comm, ctrl->sreq+i); + for (nsends=0, i=0; i 0) + gkMPI_Isend((void *)(minfo+sinfo[i]), sinfo[i+1]-sinfo[i], IDX_T, + i, 1, ctrl->comm, ctrl->sreq+nsends++); } + PASSERT(ctrl, nrecvs <= ctrl->ncommpes); + PASSERT(ctrl, nsends <= ctrl->ncommpes); /* Wait for the send/recv to finish */ - for (i=0; i 0) - MPI_Wait(ctrl->rreq+i, &ctrl->status); - } - for (i=0; i 0) - MPI_Wait(ctrl->sreq+i, &ctrl->status); - } + gkMPI_Waitall(nrecvs, ctrl->rreq, ctrl->statuses); + gkMPI_Waitall(nsends, ctrl->sreq, ctrl->statuses); /* Scatter the info received in auxinfo back to info. */ for (i=0; inparts; - nvtxs = graph->nvtxs; - xadj = graph->xadj; + nvtxs = graph->nvtxs; + xadj = graph->xadj; adjncy = graph->adjncy; adjwgt = graph->adjwgt; - where = graph->where; + where = graph->where; - mvtxdist = idxmalloc(nparts+1, "MoveGraph: mvtxdist"); + mvtxdist = iwspacemalloc(ctrl, nparts+1); + lpwgts = iwspacemalloc(ctrl, nparts+1); + gpwgts = iwspacemalloc(ctrl, nparts+1); - /* Let's do a prefix scan to determine the labeling of the nodes given */ - lpwgts = wspace->pv1; - gpwgts = wspace->pv2; - /* Here we care about the count and not total weight (diff since graph may be weighted */ - idxset(nparts, 0, lpwgts); + /* Here we care about the count and not total weight (diff since graph may + be weighted */ + iset(nparts, 0, lpwgts); for (i=0; icomm); - MPI_Allreduce((void *)lpwgts, (void *)mvtxdist, nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm); + /* Let's do a prefix scan to determine the labeling of the nodes given */ + gkMPI_Scan((void *)lpwgts, (void *)gpwgts, nparts, IDX_T, MPI_SUM, ctrl->comm); + gkMPI_Allreduce((void *)lpwgts, (void *)mvtxdist, nparts, IDX_T, MPI_SUM, ctrl->comm); MAKECSR(i, nparts, mvtxdist); @@ -296,19 +297,17 @@ void FindVtxPerm(CtrlType *ctrl, GraphType *graph, idxtype *perm, WorkSpaceType for (i=0; invtxs; xadj = graph->xadj; @@ -321,7 +320,7 @@ void CheckMGraph(CtrlType *ctrl, GraphType *graph) for (i=0; i= firstvtx && adjncy[j] < lastvtx) { k = adjncy[j]-firstvtx; @@ -330,7 +329,7 @@ void CheckMGraph(CtrlType *ctrl, GraphType *graph) break; } if (jj == xadj[k+1]) - myprintf(ctrl, "(%d %d) but not (%d %d) [%d %d] [%d %d]\n", + myprintf(ctrl, "(%"PRIDX" %"PRIDX") but not (%"PRIDX" %"PRIDX") [%"PRIDX" %"PRIDX"] [%"PRIDX" %"PRIDX"]\n", i, k, k, i, firstvtx+i, firstvtx+k, xadj[i+1]-xadj[i], xadj[k+1]-xadj[k]); } diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/msetup.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/msetup.c index e3f6fff5..d668cb14 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/msetup.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/msetup.c @@ -8,7 +8,7 @@ * Started 10/19/96 * George * - * $Id: msetup.c,v 1.3 2003/07/31 06:14:01 karypis Exp $ + * $Id: msetup.c 10057 2011-06-02 13:44:44Z karypis $ * */ @@ -17,18 +17,18 @@ /************************************************************************* -* This function setsup the CtrlType structure +* This function setsup the ctrl_t structure **************************************************************************/ -MeshType *SetUpMesh(int *etype, int *ncon, idxtype *elmdist, idxtype *elements, - idxtype *elmwgt, int *wgtflag, MPI_Comm *comm) +mesh_t *SetUpMesh(idx_t *etype, idx_t *ncon, idx_t *elmdist, idx_t *elements, + idx_t *elmwgt, idx_t *wgtflag, MPI_Comm *comm) { - MeshType *mesh; - int i, npes, mype; - int esizes[5] = {-1, 3, 4, 8, 4}; - int maxnode, gmaxnode, minnode, gminnode; + mesh_t *mesh; + idx_t i, npes, mype; + idx_t esizes[5] = {-1, 3, 4, 8, 4}; + idx_t maxnode, gmaxnode, minnode, gminnode; - MPI_Comm_size(*comm, &npes); - MPI_Comm_rank(*comm, &mype); + gkMPI_Comm_size(*comm, &npes); + gkMPI_Comm_rank(*comm, &mype); mesh = CreateMesh(); mesh->elmdist = elmdist; @@ -41,31 +41,31 @@ MeshType *SetUpMesh(int *etype, int *ncon, idxtype *elmdist, idxtype *elements, mesh->esize = esizes[*etype]; if (((*wgtflag)&1) == 0) { - mesh->elmwgt = idxsmalloc(mesh->nelms*mesh->ncon, 1, "SetUpMesh: elmwgt"); + mesh->elmwgt = ismalloc(mesh->nelms*mesh->ncon, 1, "SetUpMesh: elmwgt"); } - minnode = elements[idxamin(mesh->nelms*mesh->esize, elements)]; - MPI_Allreduce((void *)&minnode, (void *)&gminnode, 1, MPI_INT, MPI_MIN, *comm); + minnode = imin(mesh->nelms*mesh->esize, elements, 1); + gkMPI_Allreduce((void *)&minnode, (void *)&gminnode, 1, IDX_T, MPI_MIN, *comm); for (i=0; inelms*mesh->esize; i++) elements[i] -= gminnode; mesh->gminnode = gminnode; - maxnode = elements[idxamax(mesh->nelms*mesh->esize, elements)]; - MPI_Allreduce((void *)&maxnode, (void *)&gmaxnode, 1, MPI_INT, MPI_MAX, *comm); + maxnode = imax(mesh->nelms*mesh->esize, elements, 1); + gkMPI_Allreduce((void *)&maxnode, (void *)&gmaxnode, 1, IDX_T, MPI_MAX, *comm); mesh->gnns = gmaxnode+1; return mesh; } /************************************************************************* -* This function creates a MeshType data structure and initializes +* This function creates a mesh_t data structure and initializes * the various fields **************************************************************************/ -MeshType *CreateMesh(void) +mesh_t *CreateMesh(void) { - MeshType *mesh; + mesh_t *mesh; - mesh = (MeshType *)GKmalloc(sizeof(MeshType), "CreateMesh: mesh"); + mesh = (mesh_t *)gk_malloc(sizeof(mesh_t), "CreateMesh: mesh"); InitMesh(mesh); @@ -73,9 +73,9 @@ MeshType *CreateMesh(void) } /************************************************************************* -* This function initializes the various fields of a MeshType. +* This function initializes the various fields of a mesh_t. **************************************************************************/ -void InitMesh(MeshType *mesh) +void InitMesh(mesh_t *mesh) { mesh->etype = -1; diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/node_refine.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/node_refine.c index 59b05fc7..13c1227b 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/node_refine.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/node_refine.c @@ -8,7 +8,7 @@ * Started 3/1/96 * George * - * $Id: node_refine.c,v 1.2 2003/07/21 17:18:50 karypis Exp $ + * $Id: node_refine.c 10391 2011-06-23 19:00:08Z karypis $ */ #include @@ -22,10 +22,10 @@ */ /************************************************************************************/ -void AllocateNodePartitionParams(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) +void AllocateNodePartitionParams(ctrl_t *ctrl, graph_t *graph) { - int nparts, nvtxs; - idxtype *vwgt; + idx_t nparts, nvtxs; + idx_t *vwgt; NRInfoType *rinfo, *myrinfo; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayInitTmr)); @@ -33,17 +33,18 @@ void AllocateNodePartitionParams(CtrlType *ctrl, GraphType *graph, WorkSpaceType nvtxs = graph->nvtxs; nparts = ctrl->nparts; - graph->nrinfo = (NRInfoType *)GKmalloc(sizeof(NRInfoType)*nvtxs, "AllocateNodePartitionParams: rinfo"); - graph->lpwgts = idxmalloc(2*nparts, "AllocateNodePartitionParams: lpwgts"); - graph->gpwgts = idxmalloc(2*nparts, "AllocateNodePartitionParams: gpwgts"); - graph->sepind = idxmalloc(nvtxs, "AllocateNodePartitionParams: sepind"); + graph->nrinfo = (NRInfoType *)gk_malloc(sizeof(NRInfoType)*nvtxs, + "AllocateNodePartitionParams: rinfo"); + graph->lpwgts = imalloc(2*nparts, "AllocateNodePartitionParams: lpwgts"); + graph->gpwgts = imalloc(2*nparts, "AllocateNodePartitionParams: gpwgts"); + graph->sepind = imalloc(nvtxs, "AllocateNodePartitionParams: sepind"); /* Allocate additional memory for graph->vwgt in order to store the weights of the remote vertices */ vwgt = graph->vwgt; - graph->vwgt = idxmalloc(nvtxs+graph->nrecv, "AllocateNodePartitionParams: graph->vwgt"); - idxcopy(nvtxs, vwgt, graph->vwgt); - GKfree((void **)&vwgt, LTERM); + graph->vwgt = imalloc(nvtxs+graph->nrecv, "AllocateNodePartitionParams: graph->vwgt"); + icopy(nvtxs, vwgt, graph->vwgt); + gk_free((void **)&vwgt, LTERM); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayInitTmr)); } @@ -57,13 +58,13 @@ void AllocateNodePartitionParams(CtrlType *ctrl, GraphType *graph, WorkSpaceType */ /************************************************************************************/ -void ComputeNodePartitionParams(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) +void ComputeNodePartitionParams(ctrl_t *ctrl, graph_t *graph) { - int i, j, nparts, nvtxs, nsep; - idxtype *xadj, *adjncy, *adjwgt, *vtxdist, *vwgt, *lpwgts, *gpwgts, *sepind; - idxtype *where; + idx_t i, j, nparts, nvtxs, nsep; + idx_t *xadj, *adjncy, *adjwgt, *vtxdist, *vwgt, *lpwgts, *gpwgts, *sepind; + idx_t *where; NRInfoType *rinfo, *myrinfo; - int me, other, otherwgt; + idx_t me, other, otherwgt; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayInitTmr)); @@ -83,11 +84,11 @@ void ComputeNodePartitionParams(CtrlType *ctrl, GraphType *graph, WorkSpaceType sepind = graph->sepind; /* Reset refinement data structures */ - idxset(2*nparts, 0, lpwgts); + iset(2*nparts, 0, lpwgts); /* Send/Receive the where and vwgt information of interface vertices. */ - CommInterfaceData(ctrl, graph, where, wspace->indices, where+nvtxs); - CommInterfaceData(ctrl, graph, vwgt, wspace->indices, vwgt+nvtxs); + CommInterfaceData(ctrl, graph, where, where+nvtxs); + CommInterfaceData(ctrl, graph, vwgt, vwgt+nvtxs); /*------------------------------------------------------------ @@ -95,7 +96,7 @@ void ComputeNodePartitionParams(CtrlType *ctrl, GraphType *graph, WorkSpaceType /------------------------------------------------------------*/ for (nsep=i=0; i= 0 && me < 2*nparts); + PASSERT(ctrl, me >= 0 && me < 2*nparts); lpwgts[me] += vwgt[i]; if (me >= nparts) { /* If it is a separator vertex */ @@ -115,7 +116,7 @@ void ComputeNodePartitionParams(CtrlType *ctrl, GraphType *graph, WorkSpaceType graph->nsep = nsep; /* Finally, sum-up the partition weights */ - MPI_Allreduce((void *)lpwgts, (void *)gpwgts, 2*nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm); + gkMPI_Allreduce((void *)lpwgts, (void *)gpwgts, 2*nparts, IDX_T, MPI_SUM, ctrl->comm); graph->mincut = gpwgts[2*nparts-1]; IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayInitTmr)); @@ -128,13 +129,13 @@ void ComputeNodePartitionParams(CtrlType *ctrl, GraphType *graph, WorkSpaceType */ /************************************************************************************/ -void UpdateNodePartitionParams(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) +void UpdateNodePartitionParams(ctrl_t *ctrl, graph_t *graph) { - int i, j, nparts, nvtxs, nsep; - idxtype *xadj, *adjncy, *adjwgt, *vtxdist, *vwgt, *lpwgts, *gpwgts, *sepind; - idxtype *where; + idx_t i, j, nparts, nvtxs, nsep; + idx_t *xadj, *adjncy, *adjwgt, *vtxdist, *vwgt, *lpwgts, *gpwgts, *sepind; + idx_t *where; NRInfoType *rinfo, *myrinfo; - int me, other, otherwgt; + idx_t me, other, otherwgt; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayInitTmr)); @@ -154,10 +155,10 @@ void UpdateNodePartitionParams(CtrlType *ctrl, GraphType *graph, WorkSpaceType * sepind = graph->sepind; /* Reset refinement data structures */ - idxset(2*nparts, 0, lpwgts); + iset(2*nparts, 0, lpwgts); /* Send/Receive the where and vwgt information of interface vertices. */ - CommInterfaceData(ctrl, graph, where, wspace->indices, where+nvtxs); + CommInterfaceData(ctrl, graph, where, where+nvtxs); /*------------------------------------------------------------ @@ -165,7 +166,7 @@ void UpdateNodePartitionParams(CtrlType *ctrl, GraphType *graph, WorkSpaceType * /------------------------------------------------------------*/ for (nsep=i=0; i= 0 && me < 2*nparts); + PASSERT(ctrl, me >= 0 && me < 2*nparts); lpwgts[me] += vwgt[i]; if (me >= nparts) { /* If it is a separator vertex */ @@ -185,7 +186,7 @@ void UpdateNodePartitionParams(CtrlType *ctrl, GraphType *graph, WorkSpaceType * graph->nsep = nsep; /* Finally, sum-up the partition weights */ - MPI_Allreduce((void *)lpwgts, (void *)gpwgts, 2*nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm); + gkMPI_Allreduce((void *)lpwgts, (void *)gpwgts, 2*nparts, IDX_T, MPI_SUM, ctrl->comm); graph->mincut = gpwgts[2*nparts-1]; IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayInitTmr)); @@ -208,28 +209,29 @@ void UpdateNodePartitionParams(CtrlType *ctrl, GraphType *graph, WorkSpaceType * 1 => 0 iteration may have a gain. */ /************************************************************************************/ -void KWayNodeRefine_Greedy(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, - int npasses, float ubfrac) +void KWayNodeRefine_Greedy(ctrl_t *ctrl, graph_t *graph, idx_t npasses, real_t ubfrac) { - int i, ii, iii, j, jj, k, pass, nvtxs, nrecv, firstvtx, lastvtx, otherlastvtx, + idx_t i, ii, iii, j, jj, k, pass, nvtxs, nrecv, firstvtx, lastvtx, otherlastvtx, side, c, cc, nmoves, nlupd, nsupd, nnbrs, nchanged, nsep, nzerogainiterations; - int npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts; - idxtype *xadj, *adjncy, *adjwgt, *vtxdist, *vwgt; - idxtype *where, *lpwgts, *gpwgts, *sepind; - idxtype *peind, *recvptr, *sendptr; - idxtype *update, *supdate, *rupdate, *pe_updates, *marker, *changed; - idxtype *badmaxpwgt; - KeyValueType *swchanges, *rwchanges; - int *nupds_pe; + idx_t npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts; + idx_t *xadj, *adjncy, *adjwgt, *vtxdist, *vwgt; + idx_t *where, *lpwgts, *gpwgts, *sepind; + idx_t *peind, *recvptr, *sendptr; + idx_t *update, *supdate, *rupdate, *pe_updates, *marker, *changed; + idx_t *badmaxpwgt; + ikv_t *swchanges, *rwchanges; + idx_t *nupds_pe; NRInfoType *rinfo, *myrinfo; - int from, to, me, other, oldcut; - FPQueueType queue; - idxtype *inqueue; - idxtype *rxadj, *radjncy; + idx_t from, to, me, other, oldcut; + rpq_t *queue; + idx_t *inqueue; + idx_t *rxadj, *radjncy; char title[1024]; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayTmr)); + WCOREPUSH; + nvtxs = graph->nvtxs; nrecv = graph->nrecv; @@ -255,29 +257,26 @@ void KWayNodeRefine_Greedy(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa recvptr = graph->recvptr; sendptr = graph->sendptr; - changed = idxmalloc(nvtxs, "KWayRefine: changed"); - rwchanges = wspace->pairs; - swchanges = rwchanges + recvptr[nnbrs]; - - update = idxmalloc(nvtxs, "KWayRefine: update"); - supdate = wspace->indices; - rupdate = supdate + recvptr[nnbrs]; - nupds_pe = imalloc(npes, "KWayRefine: nupds_pe"); - - marker = idxsmalloc(nvtxs+nrecv, 0, "KWayRefine: marker"); - - FPQueueInit(&queue, nvtxs); + badmaxpwgt = iwspacemalloc(ctrl, nparts); + nupds_pe = iwspacemalloc(ctrl, npes); + changed = iwspacemalloc(ctrl, nvtxs); + update = iwspacemalloc(ctrl, nvtxs); + marker = iset(nvtxs+nrecv, 0, iwspacemalloc(ctrl, nvtxs+nrecv)); + inqueue = iwspacemalloc(ctrl, nvtxs+nrecv); + rwchanges = ikvwspacemalloc(ctrl, graph->nrecv); + swchanges = ikvwspacemalloc(ctrl, graph->nsend); + supdate = iwspacemalloc(ctrl, graph->nrecv); + rupdate = iwspacemalloc(ctrl, graph->nsend); - inqueue = idxmalloc(nvtxs+nrecv, "KWayRefine:: inqueue"); + queue = rpqCreate(nvtxs); - badmaxpwgt = wspace->pv1; - - for (i=0; i= 0) @@ -285,7 +284,8 @@ void KWayNodeRefine_Greedy(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa } } MAKECSR(i, nrecv, rxadj); - radjncy = idxmalloc(rxadj[nrecv], "KWayRefine: radjncy"); + + radjncy = iwspacemalloc(ctrl, rxadj[nrecv]); for (i=0; i= 0) @@ -312,26 +312,26 @@ void KWayNodeRefine_Greedy(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa if (where[i] >= nparts) ii++; } - ASSERT(ctrl, ii == nsep); + PASSERT(ctrl, ii == nsep); #endif /* Put the separator nodes in queue */ - FPQueueReset(&queue); - idxset(nvtxs+nrecv, 0, inqueue); + rpqReset(queue); + iset(nvtxs+nrecv, 0, inqueue); for (ii=0; ii= nparts); - FPQueueInsert(&queue, i, vwgt[i] - rinfo[i].edegrees[cc]); + PASSERT(ctrl, inqueue[i] == 0); + PASSERT(ctrl, where[i] >= nparts); + rpqInsert(queue, i, vwgt[i] - rinfo[i].edegrees[cc]); inqueue[i] = 1; } nlupd = nsupd = nmoves = nchanged = nsep = 0; - while ((i = FPQueueGetMax(&queue)) != -1) { + while ((i = rpqGetTop(queue)) != -1) { inqueue[i] = 0; from = where[i]; - ASSERT(ctrl, from >= nparts); + PASSERT(ctrl, from >= nparts); /* It is a one-sided move so it will go to the other partition. Look at the comments in InitMultisection to understand the meaning @@ -339,7 +339,6 @@ void KWayNodeRefine_Greedy(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa to = from%nparts+c; /* where to move the separator node */ other = from%nparts+cc; /* the other partition involved in the 3-way view */ - /* Go through the loop to see if gain is possible for the separator vertex */ if (gpwgts[to]+vwgt[i] <= badmaxpwgt[to] && vwgt[i] - rinfo[i].edegrees[cc] >= 0) { /* Update the where information of the vertex you moved */ @@ -362,9 +361,9 @@ void KWayNodeRefine_Greedy(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa for (jj=xadj[ii]; jj= vwgt[ii]); + PASSERT(ctrl, rinfo[iii].edegrees[cc] >= vwgt[ii]); rinfo[iii].edegrees[cc] -= vwgt[ii]; - FPQueueUpdate(&queue, iii, vwgt[iii]-rinfo[iii].edegrees[cc]); + rpqUpdate(queue, iii, vwgt[iii]-rinfo[iii].edegrees[cc]); } } } @@ -372,9 +371,9 @@ void KWayNodeRefine_Greedy(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa for (jj=rxadj[ii-nvtxs]; jj= vwgt[ii]); + PASSERT(ctrl, rinfo[iii].edegrees[cc] >= vwgt[ii]); rinfo[iii].edegrees[cc] -= vwgt[ii]; - FPQueueUpdate(&queue, iii, vwgt[iii]-rinfo[iii].edegrees[cc]); + rpqUpdate(queue, iii, vwgt[iii]-rinfo[iii].edegrees[cc]); } } } @@ -401,11 +400,12 @@ void KWayNodeRefine_Greedy(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa } } - /* myprintf(ctrl, "nmoves: %d, nlupd: %d, nsupd: %d\n", nmoves, nlupd, nsupd); */ + /* myprintf(ctrl, "nmoves: %"PRIDX", nlupd: %"PRIDX", nsupd: %"PRIDX"\n", nmoves, nlupd, nsupd); */ - IFSET(ctrl->dbglvl, DBG_RMOVEINFO, rprintf(ctrl, "\t[%d %d], [%d %d %d]\n", - pass, c, GlobalSESum(ctrl, nmoves), GlobalSESum(ctrl, nsupd), - GlobalSESum(ctrl, nlupd))); + IFSET(ctrl->dbglvl, DBG_RMOVEINFO, rprintf(ctrl, + "\t[%"PRIDX" %"PRIDX"], [%"PRIDX" %"PRIDX" %"PRIDX"]\n", + pass, c, GlobalSESum(ctrl, nmoves), GlobalSESum(ctrl, nsupd), + GlobalSESum(ctrl, nlupd))); /*----------------------------------------------------------------------- @@ -414,8 +414,8 @@ void KWayNodeRefine_Greedy(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa /-----------------------------------------------------------------------*/ /* Issue the receives first */ for (i=0; icomm, ctrl->rreq+i); + gkMPI_Irecv((void *)(rupdate+sendptr[i]), sendptr[i+1]-sendptr[i], IDX_T, + peind[i], 1, ctrl->comm, ctrl->rreq+i); } /* Issue the sends next. This needs some preporcessing */ @@ -423,21 +423,21 @@ void KWayNodeRefine_Greedy(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa marker[supdate[i]] = 0; supdate[i] = graph->imap[supdate[i]]; } - iidxsort(nsupd, supdate); + isorti(nsupd, supdate); for (j=i=0; icomm, + gkMPI_Isend((void *)(supdate+j), k-j, IDX_T, peind[i], 1, ctrl->comm, ctrl->sreq+i); j = k; } /* OK, now get into the loop waiting for the send/recv operations to finish */ - MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); + gkMPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); for (i=0; istatuses+i, IDX_DATATYPE, nupds_pe+i); - MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); + gkMPI_Get_count(ctrl->statuses+i, IDX_T, nupds_pe+i); + gkMPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); /*------------------------------------------------------------- @@ -471,14 +471,15 @@ void KWayNodeRefine_Greedy(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa lpwgts[where[i]] += vwgt[i]; lpwgts[2*nparts-1] += vwgt[i]; - /* myprintf(ctrl, "Vertex %d moves into the separator from %d to %d\n", + /* myprintf(ctrl, "Vertex %"PRIDX" moves into the separator from %"PRIDX" to %"PRIDX"\n", i+firstvtx, me, where[i]); */ } } - /* Tell everybody interested what the new where[] info is for the interface vertices */ - CommChangedInterfaceData(ctrl, graph, nchanged, changed, where, swchanges, - rwchanges, wspace->pv4); + /* Tell everybody interested what the new where[] info is for the + interface vertices */ + CommChangedInterfaceData(ctrl, graph, nchanged, changed, where, + swchanges, rwchanges); /*------------------------------------------------------------- @@ -486,13 +487,13 @@ void KWayNodeRefine_Greedy(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa /-------------------------------------------------------------*/ for (ii=0; ii= nparts) { /* If it is a separator vertex */ - /* myprintf(ctrl, "Updating %d %d\n", i+firstvtx, me); */ + /* myprintf(ctrl, "Updating %"PRIDX" %"PRIDX"\n", i+firstvtx, me); */ myrinfo = rinfo+i; myrinfo->edegrees[0] = myrinfo->edegrees[1] = 0; @@ -506,13 +507,13 @@ void KWayNodeRefine_Greedy(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa } /* Finally, sum-up the partition weights */ - MPI_Allreduce((void *)lpwgts, (void *)gpwgts, 2*nparts, IDX_DATATYPE, MPI_SUM, + gkMPI_Allreduce((void *)lpwgts, (void *)gpwgts, 2*nparts, IDX_T, MPI_SUM, ctrl->comm); graph->mincut = gpwgts[2*nparts-1]; - sprintf(title, "\tTotalSep [%d]", c); - IFSET(ctrl->dbglvl, DBG_REFINEINFO, PrintNodeBalanceInfo(ctrl, nparts, gpwgts, - badmaxpwgt, title)); + sprintf(title, "\tTotalSep [%"PRIDX"]", c); + IFSET(ctrl->dbglvl, DBG_REFINEINFO, + PrintNodeBalanceInfo(ctrl, nparts, gpwgts, badmaxpwgt, title)); /* break out if there is no improvement in two successive inner iterations that can span successive outer iterations */ @@ -533,9 +534,9 @@ void KWayNodeRefine_Greedy(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa break; } - FPQueueFree(&queue); - GKfree((void **)&update, &nupds_pe, &marker, &changed, &inqueue, - &rxadj, &radjncy, LTERM); + rpqDestroy(queue); + + WCOREPOP; IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayTmr)); } @@ -548,20 +549,19 @@ void KWayNodeRefine_Greedy(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspa a serial node-based refinement algortihm. */ /************************************************************************************/ -void KWayNodeRefine2Phase(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, - int npasses, float ubfrac) +void KWayNodeRefine2Phase(ctrl_t *ctrl, graph_t *graph, idx_t npasses, real_t ubfrac) { - int i, oldcut; + idx_t i, oldcut; oldcut = graph->mincut+1; for (i=0; imincut == oldcut) break; oldcut = graph->mincut; - KWayNodeRefineInterior(ctrl, graph, wspace, 2, ubfrac); - UpdateNodePartitionParams(ctrl, graph, wspace); + KWayNodeRefineInterior(ctrl, graph, 2, ubfrac); + UpdateNodePartitionParams(ctrl, graph); if (graph->mincut == oldcut) break; oldcut = graph->mincut; @@ -570,25 +570,23 @@ void KWayNodeRefine2Phase(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspac /************************************************************************************/ -/*! - This function performs k-way node-based refinement of the interior nodes of the - graph assigned to each processor using a serial node-refinement algorithm. - -*/ +/*! This function performs k-way node-based refinement of the interior nodes of the + graph assigned to each processor using a serial node-refinement algorithm. */ /************************************************************************************/ -void KWayNodeRefineInterior(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, - int npasses, float ubfrac) +void KWayNodeRefineInterior(ctrl_t *ctrl, graph_t *graph, idx_t npasses, real_t ubfrac) { - int i, j, k, ii, gnnz, gid, qsize; - int npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts; - idxtype nvtxs, *xadj, *adjncy, *vwgt, *where, *pexadj; - idxtype gnvtxs, *gxadj, *gadjncy, *gvwgt, *gwhere, *ghmarker; - idxtype *gmap, *gimap; - idxtype *pptr, *pind; + idx_t i, j, k, ii, gnnz, gid, qsize; + idx_t npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts; + idx_t nvtxs, *xadj, *adjncy, *vwgt, *where, *pexadj; + idx_t gnvtxs, *gxadj, *gadjncy, *gvwgt, *gwhere, *ghmarker; + idx_t *gmap, *gimap; + idx_t *pptr, *pind; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->AuxTmr1)); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayTmr)); + WCOREPUSH; + nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; @@ -596,15 +594,15 @@ void KWayNodeRefineInterior(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wsp where = graph->where; pexadj = graph->pexadj; - gxadj = idxmalloc(nvtxs+1, "KWayNodeRefineInterior: gxadj"); - gvwgt = idxmalloc(nvtxs, "KWayNodeRefineInterior: gvwgt"); - gadjncy = idxmalloc(xadj[nvtxs], "KWayNodeRefineInterior: gadjncy"); - gwhere = idxmalloc(nvtxs, "KWayNodeRefineInterior: gwhere"); - ghmarker = idxmalloc(nvtxs, "KWayNodeRefineInterior: ghmarker"); - gmap = idxsmalloc(nvtxs, -1, "KWayNodeRefineInterior: gmap"); - gimap = idxmalloc(nvtxs, "KWayNodeRefineInterior: gimap"); - pptr = idxsmalloc(2*nparts+1, 0, "KWayNodeRefineInterior: pptr"); - pind = idxmalloc(nvtxs, "KWayNodeRefineInterior: pind"); + gxadj = iwspacemalloc(ctrl, nvtxs+1); + gvwgt = iwspacemalloc(ctrl, nvtxs); + gadjncy = iwspacemalloc(ctrl, xadj[nvtxs]); + gwhere = iwspacemalloc(ctrl, nvtxs); + ghmarker = iwspacemalloc(ctrl, nvtxs); + gmap = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs)); + gimap = iwspacemalloc(ctrl, nvtxs); + pptr = iset(2*nparts+1, 0, iwspacemalloc(ctrl, 2*nparts+1)); + pind = iwspacemalloc(ctrl, nvtxs); /* Set pptr/pind to contain the vertices in each one of the partitions */ @@ -648,7 +646,7 @@ void KWayNodeRefineInterior(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wsp gvwgt[gnvtxs] = vwgt[i]; gwhere[gnvtxs] = where[i] - gid; ghmarker[gnvtxs] = (pexadj[i+1]-pexadj[i] > 0 ? gwhere[gnvtxs] : -1); - ASSERT(ctrl, gwhere[gnvtxs] >= 0 && gwhere[gnvtxs] <= 1); + PASSERT(ctrl, gwhere[gnvtxs] >= 0 && gwhere[gnvtxs] <= 1); } gxadj[0]=0; gnvtxs=0; gnnz=0; @@ -675,9 +673,9 @@ void KWayNodeRefineInterior(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wsp if (gnnz == 0) continue; - /* graph->adjwgt is used as a dummy argument, as it is not used in node refinement */ - /* The 1.03 is here by choice as it is better to refine using tight constraints */ - METIS_NodeRefine(gnvtxs, gxadj, gvwgt, gadjncy, graph->adjwgt, gwhere, ghmarker, 1.03); + /* The 1.03 is here by choice as it is better to refine using + tight constraints */ + METIS_NodeRefine(gnvtxs, gxadj, gvwgt, gadjncy, gwhere, ghmarker, 1.03); for (i=0; idbglvl, DBG_TIME, stoptimer(ctrl->KWayTmr)); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->AuxTmr1)); @@ -696,24 +693,22 @@ void KWayNodeRefineInterior(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wsp /************************************************************************************/ -/*! - This function prints balance information for the parallel k-section refinement - algorithm -*/ +/*! This function prints balance information for the parallel k-section refinement + algorithm. */ /************************************************************************************/ -void PrintNodeBalanceInfo(CtrlType *ctrl, int nparts, idxtype *gpwgts, idxtype *badmaxpwgt, +void PrintNodeBalanceInfo(ctrl_t *ctrl, idx_t nparts, idx_t *gpwgts, idx_t *badmaxpwgt, char *title) { - int i; + idx_t i; if (ctrl->mype == 0) { - printf("%s: %d, ", title, gpwgts[2*nparts-1]); + printf("%s: %"PRIDX", ", title, gpwgts[2*nparts-1]); for (i=0; icomm); + gkMPI_Barrier(ctrl->comm); } diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/ometis.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/ometis.c index 89314c31..01460764 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/ometis.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/ometis.c @@ -6,7 +6,7 @@ * * \date Started 8/1/2008 * \author George Karypis - * \version\verbatim $Id: ometis.c 9716 2011-04-06 15:50:54Z karypis $ \endverbatime + * \version\verbatim $Id: ometis.c 10666 2011-08-04 05:22:36Z karypis $ \endverbatime * */ @@ -17,12 +17,19 @@ /*! This function is the entry point of the parallel ordering algorithm. It simply translates the arguments to the tunable version. */ /***********************************************************************************/ -void ParMETIS_V3_NodeND(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, - idxtype *numflag, int *options, idxtype *order, idxtype *sizes, +int ParMETIS_V3_NodeND(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, + idx_t *numflag, idx_t *options, idx_t *order, idx_t *sizes, MPI_Comm *comm) { - int seed = (options != NULL && options[0] != 0 ? options[PMV3_OPTION_SEED] : -1); - int dbglvl = (options != NULL && options[0] != 0 ? options[PMV3_OPTION_DBGLVL] : -1); + idx_t status; + idx_t seed = (options != NULL && options[0] != 0 ? options[PMV3_OPTION_SEED] : -1); + idx_t dbglvl = (options != NULL && options[0] != 0 ? options[PMV3_OPTION_DBGLVL] : -1); + + /* Check the input parameters and return if an error */ + status = CheckInputsNodeND(vtxdist, xadj, adjncy, numflag, options, order, sizes, comm); + if (GlobalSEMinComm(*comm, status) == 0) + return METIS_ERROR; + ParMETIS_V32_NodeND(vtxdist, xadj, adjncy, /*vwgt=*/NULL, @@ -32,31 +39,11 @@ void ParMETIS_V3_NodeND(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, /*p_nseps=*/NULL, /*s_nseps=*/NULL, /*ubfrac=*/NULL, - /*seed=*/(options==NULL | options[0] == 0 ? NULL : &seed), - /*dbglvl=*/(options==NULL | options[0] == 0 ? NULL : &dbglvl), + /*seed=*/(options==NULL || options[0] == 0 ? NULL : &seed), + /*dbglvl=*/(options==NULL || options[0] == 0 ? NULL : &dbglvl), order, sizes, comm); -} - - -/***********************************************************************************/ -/*! This function is the entry point of the parallel ordering algorithm using the - old API */ -/***********************************************************************************/ -void PAROMETIS(idxtype *vtxdist, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, - idxtype *adjwgt, idxtype *order, idxtype *sizes, int *options, MPI_Comm comm) -{ - int numflag, newoptions[5]; - - newoptions[0] = 1; - newoptions[PMV3_OPTION_DBGLVL] = options[4]; - newoptions[PMV3_OPTION_SEED] = GLOBAL_SEED; - - numflag = options[3]; - - ParMETIS_V3_NodeND(vtxdist, xadj, adjncy, &numflag, newoptions, order, sizes, &comm); - - options[0] = -1; + return METIS_OK; } @@ -66,84 +53,68 @@ void PAROMETIS(idxtype *vtxdist, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, dissection ordering approach. */ /***********************************************************************************/ -void ParMETIS_V32_NodeND(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *numflag, int *mtype, int *rtype, int *p_nseps, int *s_nseps, - float *ubfrac, int *seed, int *dbglvl, idxtype *order, idxtype *sizes, +int ParMETIS_V32_NodeND(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t *numflag, idx_t *mtype, idx_t *rtype, idx_t *p_nseps, idx_t *s_nseps, + real_t *ubfrac, idx_t *seed, idx_t *idbglvl, idx_t *order, idx_t *sizes, MPI_Comm *comm) { - int i, j; - int ltvwgts[MAXNCON]; - int npes, mype, wgtflag; - CtrlType ctrl; - WorkSpaceType wspace; - GraphType *graph, *mgraph; - idxtype *morder; - int minnvtxs, dbglvl_original; - - MPI_Comm_size(*comm, &npes); - MPI_Comm_rank(*comm, &mype); + idx_t i, npes, mype, dbglvl, status, wgtflag=0; + ctrl_t *ctrl; + graph_t *graph, *mgraph; + idx_t *morder; + size_t curmem; + + gkMPI_Comm_size(*comm, &npes); + gkMPI_Comm_rank(*comm, &mype); /* Deal with poor vertex distributions */ - ctrl.comm = *comm; - if (GlobalSEMin(&ctrl, vtxdist[mype+1]-vtxdist[mype]) < 1) { - if (mype == 0) - printf("Error: Poor vertex distribution (processor with no vertices).\n"); - return; + if (GlobalSEMinComm(*comm, vtxdist[mype+1]-vtxdist[mype]) < 1) { + printf("Error: Poor vertex distribution (processor with no vertices).\n"); + return METIS_ERROR; } -#ifdef XXX - /* Increase all weights by one to eliminate potentially zero weight vertices */ - if (vwgt) { - for (i=0; idbglvl = dbglvl; + STARTTIMER(ctrl, ctrl->TotalTmr); + ctrl->dbglvl = 0; /*=======================================================================*/ /*! Compute the initial k-way partitioning */ /*=======================================================================*/ - ctrl.nparts = 5*npes; - ctrl.partType = STATIC_PARTITION; - ctrl.tpwgts = fsmalloc(ctrl.nparts, 1.0/(float)(ctrl.nparts), "tpwgts"); - ctrl.ubvec[0] = 1.03; - ctrl.CoarsenTo = amin(vtxdist[npes]+1, 200*amax(npes, ctrl.nparts)); - ctrl.ps_relation = -1; - ctrl.dbglvl = 0; + /* Setup the graph */ + if (*numflag > 0) + ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 1); - ctrl.seed = (seed == NULL ? GLOBAL_SEED : *seed); - ctrl.seed = (ctrl.seed == 0 ? mype : ctrl.seed*mype); - ctrl.sync = GlobalSEMax(&ctrl, ctrl.seed); + graph = SetupGraph(ctrl, 1, vtxdist, xadj, NULL, NULL, adjncy, NULL, 0); - wgtflag = 0; - graph = Mc_SetUpGraph(&ctrl, 1, vtxdist, xadj, NULL, adjncy, NULL, &wgtflag); + /* Allocate workspace */ + AllocateWSpace(ctrl, 10*graph->nvtxs); - AllocateWSpace(&ctrl, graph, &wspace); - IFSET(dbglvl_original, DBG_TIME, InitTimers(&ctrl)); - IFSET(dbglvl_original, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(dbglvl_original, DBG_TIME, starttimer(ctrl.TotalTmr)); + /* Compute the partitioning */ + ctrl->CoarsenTo = gk_min(vtxdist[npes]+1, 200*gk_max(npes, ctrl->nparts)); + if (seed != NULL) + ctrl->seed = (*seed == 0 ? mype : (*seed)*mype); - Mc_Global_Partition(&ctrl, graph, &wspace); + Global_Partition(ctrl, graph); /* Collapse the number of partitions to be from 0..npes-1 */ for (i=0; invtxs; i++) graph->where[i] = graph->where[i]%npes; - ctrl.nparts = npes; + ctrl->nparts = npes; /* Put back the real vertex weights */ if (vwgt) { - GKfree((void **)&graph->vwgt, LTERM); - graph->vwgt = vwgt; + gk_free((void **)&graph->vwgt, LTERM); + graph->vwgt = vwgt; + graph->free_vwgt = 0; wgtflag = 2; } @@ -151,77 +122,61 @@ void ParMETIS_V32_NodeND(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxty /*=======================================================================*/ /*! Move the graph according to the partitioning */ /*=======================================================================*/ - IFSET(dbglvl_original, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(dbglvl_original, DBG_TIME, starttimer(ctrl.MoveTmr)); + STARTTIMER(ctrl, ctrl->MoveTmr); + + mgraph = MoveGraph(ctrl, graph); - mgraph = Mc_MoveGraph(&ctrl, graph, &wspace); + /* compute nvwgts for the moved graph */ + SetupGraph_nvwgts(ctrl, mgraph); - IFSET(dbglvl_original, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(dbglvl_original, DBG_TIME, stoptimer(ctrl.MoveTmr)); + STOPTIMER(ctrl, ctrl->MoveTmr); /*=======================================================================*/ /*! Now compute an ordering of the moved graph */ /*=======================================================================*/ - AdjustWSpace(&ctrl, mgraph, &wspace); - - ctrl.partType = ORDER_PARTITION; - ctrl.mtype = (mtype == NULL ? PARMETIS_MTYPE_GLOBAL : *mtype); - ctrl.rtype = (rtype == NULL ? PARMETIS_SRTYPE_2PHASE : *rtype); - ctrl.p_nseps = (p_nseps == NULL ? 1 : *p_nseps); - ctrl.s_nseps = (s_nseps == NULL ? 1 : *s_nseps); - ctrl.ubfrac = (ubfrac == NULL ? ORDER_UNBALANCE_FRACTION : *ubfrac); - ctrl.dbglvl = dbglvl_original; - ctrl.ipart = ISEP_NODE; - ctrl.CoarsenTo = amin(graph->gnvtxs-1, - amax(1500*npes, graph->gnvtxs/(5*NUM_INIT_MSECTIONS*npes))); - - /* compute tvwgts */ - for (j=0; jncon; j++) - ltvwgts[j] = 0; - - for (i=0; invtxs; i++) - for (j=0; jncon; j++) - ltvwgts[j] += mgraph->vwgt[i*mgraph->ncon+j]; - - for (j=0; jncon; j++) - ctrl.tvwgts[j] = GlobalSESum(&ctrl, ltvwgts[j]); - - mgraph->nvwgt = fmalloc(mgraph->nvtxs*mgraph->ncon, "mgraph->nvwgt"); - for (i=0; invtxs; i++) - for (j=0; jncon; j++) - mgraph->nvwgt[i*mgraph->ncon+j] = (float)(mgraph->vwgt[i*mgraph->ncon+j]) / (float)(ctrl.tvwgts[j]); - - - morder = idxmalloc(mgraph->nvtxs, "PAROMETIS: morder"); - MultilevelOrder(&ctrl, mgraph, morder, sizes, &wspace); + ctrl->optype = PARMETIS_OP_OMETIS; + ctrl->partType = ORDER_PARTITION; + ctrl->mtype = (mtype == NULL ? PARMETIS_MTYPE_GLOBAL : *mtype); + ctrl->rtype = (rtype == NULL ? PARMETIS_SRTYPE_2PHASE : *rtype); + ctrl->p_nseps = (p_nseps == NULL ? 1 : *p_nseps); + ctrl->s_nseps = (s_nseps == NULL ? 1 : *s_nseps); + ctrl->ubfrac = (ubfrac == NULL ? ORDER_UNBALANCE_FRACTION : *ubfrac); + ctrl->dbglvl = dbglvl; + ctrl->ipart = ISEP_NODE; + ctrl->CoarsenTo = gk_min(graph->gnvtxs-1, + gk_max(1500*npes, graph->gnvtxs/(5*NUM_INIT_MSECTIONS*npes))); + + morder = imalloc(mgraph->nvtxs, "ParMETIS_NodeND: morder"); + MultilevelOrder(ctrl, mgraph, morder, sizes); /* Invert the ordering back to the original graph */ - ProjectInfoBack(&ctrl, graph, order, morder, &wspace); + ProjectInfoBack(ctrl, graph, order, morder); - IFSET(dbglvl_original, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(dbglvl_original, DBG_TIME, stoptimer(ctrl.TotalTmr)); - IFSET(dbglvl_original, DBG_TIME, PrintTimingInfo(&ctrl)); - IFSET(dbglvl_original, DBG_TIME, MPI_Barrier(ctrl.gcomm)); + STOPTIMER(ctrl, ctrl->TotalTmr); + IFSET(dbglvl, DBG_TIME, PrintTimingInfo(ctrl)); + IFSET(dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->gcomm)); - GKfree((void **)&ctrl.tpwgts, &morder, LTERM); + gk_free((void **)&morder, LTERM); FreeGraph(mgraph); - FreeInitialGraphAndRemap(graph, wgtflag, 1); - FreeWSpace(&wspace); - FreeCtrl(&ctrl); + FreeInitialGraphAndRemap(graph); /* If required, restore the graph numbering */ - if (*numflag == 1) + if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 0); -#ifdef XXX - /* Decrease the earlier increased weights */ - if (vwgt) { - for (i=0; i 0) { + printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n", + (ssize_t)(gk_GetCurMemoryUsed() - curmem)); } -#endif + gk_malloc_cleanup(0); + return (int)status; } @@ -234,85 +189,84 @@ void ParMETIS_V32_NodeND(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxty top-level separator is stores at \c sizes[2*nparts-2]. */ /*********************************************************************************/ -void MultilevelOrder(CtrlType *ctrl, GraphType *graph, idxtype *order, idxtype *sizes, - WorkSpaceType *wspace) +void MultilevelOrder(ctrl_t *ctrl, graph_t *graph, idx_t *order, idx_t *sizes) { - int i, nparts, nvtxs, npes; - idxtype *perm, *lastnode, *morder, *porder; - GraphType *mgraph; + idx_t i, nparts, nvtxs, npes; + idx_t *perm, *lastnode, *morder, *porder; + graph_t *mgraph; nvtxs = graph->nvtxs; npes = 1<npes); /* # of nested dissection levels = floor(log_2(npes)) */ - perm = idxmalloc(nvtxs, "MultilevelOrder: perm"); - lastnode = idxsmalloc(4*npes, -1, "MultilevelOrder: lastnode"); + perm = imalloc(nvtxs, "MultilevelOrder: perm"); + lastnode = ismalloc(4*npes, -1, "MultilevelOrder: lastnode"); for (i=0; ignvtxs; - idxset(nvtxs, -1, order); + iset(nvtxs, -1, order); /* This is used as a pointer to the end of the sizes[] array (i.e., >=nparts) that has not yet been filled in so that the separator sizes of the succesive levels will be stored correctly. It is used in LabelSeparatos() */ sizes[0] = 2*npes-1; - graph->where = idxsmalloc(nvtxs, 0, "MultilevelOrder: graph->where"); + graph->where = ismalloc(nvtxs, 0, "MultilevelOrder: graph->where"); for (nparts=2; nparts<=npes; nparts*=2) { ctrl->nparts = nparts; - Order_Partition_Multiple(ctrl, graph, wspace); + Order_Partition_Multiple(ctrl, graph); - LabelSeparators(ctrl, graph, lastnode, perm, order, sizes, wspace); + LabelSeparators(ctrl, graph, lastnode, perm, order, sizes); - CompactGraph(ctrl, graph, perm, wspace); + CompactGraph(ctrl, graph, perm); if (ctrl->CoarsenTo < 100*nparts) { ctrl->CoarsenTo = 1.5*ctrl->CoarsenTo; } - ctrl->CoarsenTo = amin(ctrl->CoarsenTo, graph->gnvtxs-1); + ctrl->CoarsenTo = gk_min(ctrl->CoarsenTo, graph->gnvtxs-1); } /*----------------------------------------------------------------- / Move the graph so that each processor gets its partition -----------------------------------------------------------------*/ - IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); + IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->MoveTmr)); - SetUp(ctrl, graph, wspace); - graph->ncon = 1; /* needed for Mc_MoveGraph */ - mgraph = Mc_MoveGraph(ctrl, graph, wspace); + CommSetup(ctrl, graph); + graph->ncon = 1; /* needed for MoveGraph */ + mgraph = MoveGraph(ctrl, graph); /* Fill in the sizes[] array for the local part. Just the vtxdist of the mgraph */ for (i=0; ivtxdist[i+1]-mgraph->vtxdist[i]; - porder = idxmalloc(graph->nvtxs, "MultilevelOrder: porder"); - morder = idxmalloc(mgraph->nvtxs, "MultilevelOrder: morder"); + porder = imalloc(graph->nvtxs, "MultilevelOrder: porder"); + morder = imalloc(mgraph->nvtxs, "MultilevelOrder: morder"); - IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); + IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->MoveTmr)); /* Find the local ordering */ if (ctrl->mype < npes) - LocalNDOrder(ctrl, mgraph, morder, lastnode[2*(npes+ctrl->mype)]-mgraph->nvtxs, wspace); + LocalNDOrder(ctrl, mgraph, morder, lastnode[2*(npes+ctrl->mype)]-mgraph->nvtxs); /* Project the ordering back to the before-move graph */ - ProjectInfoBack(ctrl, graph, porder, morder, wspace); + ProjectInfoBack(ctrl, graph, porder, morder); /* Copy the ordering from porder to order using perm */ for (i=0; invtxs; i++) { - ASSERT(ctrl, order[perm[i]] == -1); + PASSERT(ctrl, order[perm[i]] == -1); order[perm[i]] = porder[i]; } FreeGraph(mgraph); - GKfree((void **)&perm, (void **)&lastnode, (void **)&porder, (void **)&morder, LTERM); + gk_free((void **)&perm, (void **)&lastnode, (void **)&porder, (void **)&morder, LTERM); /* PrintVector(ctrl, 2*npes-1, 0, sizes, "SIZES"); */ } @@ -322,13 +276,13 @@ void MultilevelOrder(CtrlType *ctrl, GraphType *graph, idxtype *order, idxtype * /*! This is the top-level driver of the multiple multisection ordering code. */ /***************************************************************************/ -void Order_Partition_Multiple(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) +void Order_Partition_Multiple(ctrl_t *ctrl, graph_t *graph) { - int i, sid, iter, nvtxs, nparts, nlevels; - idxtype *xadj, *adjncy, *where, *gpwgts, *imap; - idxtype *bestseps, *bestwhere, *origwhere; + idx_t i, sid, iter, nvtxs, nparts, nlevels; + idx_t *xadj, *adjncy, *where, *gpwgts, *imap; + idx_t *bestseps, *bestwhere, *origwhere; - SetUp(ctrl, graph, wspace); + CommSetup(ctrl, graph); nparts = ctrl->nparts; @@ -336,16 +290,16 @@ void Order_Partition_Multiple(CtrlType *ctrl, GraphType *graph, WorkSpaceType *w xadj = graph->xadj; adjncy = graph->adjncy; - bestseps = idxsmalloc(2*nparts, -1, "Order_Partition_Multiple: bestseps"); - bestwhere = idxmalloc(nvtxs+graph->nrecv, "Order_Partition_Multiple: bestwhere"); + bestseps = ismalloc(2*nparts, -1, "Order_Partition_Multiple: bestseps"); + bestwhere = imalloc(nvtxs+graph->nrecv, "Order_Partition_Multiple: bestwhere"); origwhere = graph->where; for (nlevels=-1, iter=0; iterp_nseps; iter++) { - graph->where = idxmalloc(nvtxs, "Order_Partition_Multiple: where"); - idxcopy(nvtxs, origwhere, graph->where); + graph->where = imalloc(nvtxs, "Order_Partition_Multiple: where"); + icopy(nvtxs, origwhere, graph->where); - Order_Partition(ctrl, graph, wspace, &nlevels, 0); + Order_Partition(ctrl, graph, &nlevels, 0); where = graph->where; gpwgts = graph->gpwgts; @@ -368,13 +322,13 @@ void Order_Partition_Multiple(CtrlType *ctrl, GraphType *graph, WorkSpaceType *w } graph->where = bestwhere; - AllocateNodePartitionParams(ctrl, graph, wspace); - ComputeNodePartitionParams(ctrl, graph, wspace); + AllocateNodePartitionParams(ctrl, graph); + ComputeNodePartitionParams(ctrl, graph); for (i=0; igpwgts[nparts+i]); + PASSERT(ctrl, bestseps[nparts+i] == graph->gpwgts[nparts+i]); - GKfree((void **)&bestseps, &origwhere, LTERM); + gk_free((void **)&bestseps, &origwhere, LTERM); /* PrintVector(ctrl, 2*nparts-1, 0, bestseps, "bestseps"); */ @@ -384,17 +338,16 @@ void Order_Partition_Multiple(CtrlType *ctrl, GraphType *graph, WorkSpaceType *w /**************************************************************************/ /*! The driver of the multilvelel separator finding algorithm */ /**************************************************************************/ -void Order_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, - int *nlevels, int clevel) +void Order_Partition(ctrl_t *ctrl, graph_t *graph, idx_t *nlevels, idx_t clevel) { - SetUp(ctrl, graph, wspace); + CommSetup(ctrl, graph); graph->ncon = 1; - IFSET(ctrl->dbglvl, DBG_PROGRESS, rprintf(ctrl, "[%6d %8d %5d %5d][%d][%d]\n", + IFSET(ctrl->dbglvl, DBG_PROGRESS, rprintf(ctrl, "[%6"PRIDX" %8"PRIDX" %5"PRIDX" %5"PRIDX"][%"PRIDX"][%"PRIDX"]\n", graph->gnvtxs, GlobalSESum(ctrl, graph->nedges), GlobalSEMin(ctrl, graph->nvtxs), GlobalSEMax(ctrl, graph->nvtxs), ctrl->CoarsenTo, - GlobalSEMax(ctrl, graph->vwgt[idxamax(graph->nvtxs, graph->vwgt)]))); + GlobalSEMax(ctrl, imax(graph->nvtxs, graph->vwgt, 1)))); if ((*nlevels != -1 && *nlevels == clevel) || (*nlevels == -1 && @@ -404,50 +357,54 @@ void Order_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, *nlevels = clevel; /* Compute the initial npart-way multisection */ - InitMultisection(ctrl, graph, wspace); + InitMultisection(ctrl, graph); if (graph->finer == NULL) { /* Do that only if no-coarsening took place */ - AllocateNodePartitionParams(ctrl, graph, wspace); - ComputeNodePartitionParams(ctrl, graph, wspace); + AllocateNodePartitionParams(ctrl, graph); + ComputeNodePartitionParams(ctrl, graph); switch (ctrl->rtype) { case PARMETIS_SRTYPE_GREEDY: - KWayNodeRefine_Greedy(ctrl, graph, wspace, NGR_PASSES, ctrl->ubfrac); + KWayNodeRefine_Greedy(ctrl, graph, NGR_PASSES, ctrl->ubfrac); break; case PARMETIS_SRTYPE_2PHASE: - KWayNodeRefine2Phase(ctrl, graph, wspace, NGR_PASSES, ctrl->ubfrac); + KWayNodeRefine2Phase(ctrl, graph, NGR_PASSES, ctrl->ubfrac); break; default: - errexit("Unknown rtype of %d\n", ctrl->rtype); + errexit("Unknown rtype of %"PRIDX"\n", ctrl->rtype); } } } else { /* Coarsen it and then partition it */ switch (ctrl->mtype) { case PARMETIS_MTYPE_LOCAL: - Match_Local(ctrl, graph, wspace); + Match_Local(ctrl, graph); break; case PARMETIS_MTYPE_GLOBAL: - Match_Global(ctrl, graph, wspace); + Match_Global(ctrl, graph); break; default: - errexit("Unknown mtype of %d\n", ctrl->mtype); + errexit("Unknown mtype of %"PRIDX"\n", ctrl->mtype); } - Order_Partition(ctrl, graph->coarser, wspace, nlevels, clevel+1); + graph_WriteToDisk(ctrl, graph); + + Order_Partition(ctrl, graph->coarser, nlevels, clevel+1); - Mc_ProjectPartition(ctrl, graph, wspace); - AllocateNodePartitionParams(ctrl, graph, wspace); - ComputeNodePartitionParams(ctrl, graph, wspace); + graph_ReadFromDisk(ctrl, graph); + + ProjectPartition(ctrl, graph); + AllocateNodePartitionParams(ctrl, graph); + ComputeNodePartitionParams(ctrl, graph); switch (ctrl->rtype) { case PARMETIS_SRTYPE_GREEDY: - KWayNodeRefine_Greedy(ctrl, graph, wspace, NGR_PASSES, ctrl->ubfrac); + KWayNodeRefine_Greedy(ctrl, graph, NGR_PASSES, ctrl->ubfrac); break; case PARMETIS_SRTYPE_2PHASE: - KWayNodeRefine2Phase(ctrl, graph, wspace, NGR_PASSES, ctrl->ubfrac); + KWayNodeRefine2Phase(ctrl, graph, NGR_PASSES, ctrl->ubfrac); break; default: - errexit("Unknown rtype of %d\n", ctrl->rtype); + errexit("Unknown rtype of %"PRIDX"\n", ctrl->rtype); } } } @@ -459,10 +416,10 @@ void Order_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, It uses the appropriate entry in the lastnode array to select label boundaries and adjusts it for the next level. */ /*********************************************************************************/ -void LabelSeparators(CtrlType *ctrl, GraphType *graph, idxtype *lastnode, idxtype - *perm, idxtype *order, idxtype *sizes, WorkSpaceType *wspace) +void LabelSeparators(ctrl_t *ctrl, graph_t *graph, idx_t *lastnode, idx_t *perm, + idx_t *order, idx_t *sizes) { - int i, nvtxs, nparts, sid; idxtype *where, *lpwgts, *gpwgts, *sizescan; + idx_t i, nvtxs, nparts, sid; idx_t *where, *lpwgts, *gpwgts, *sizescan; nparts = ctrl->nparts; @@ -475,23 +432,23 @@ void LabelSeparators(CtrlType *ctrl, GraphType *graph, idxtype *lastnode, idxtyp if (ctrl->mype == 0) { printf("SepWgts: "); for (i=0; icomm); + gkMPI_Barrier(ctrl->comm); } /* Compute the local size of the separator. This is required in case the graph has vertex weights */ - idxset(2*nparts, 0, lpwgts); + iset(2*nparts, 0, lpwgts); for (i=0; icomm); - MPI_Allreduce((void *)lpwgts, (void *)gpwgts, 2*nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm); + gkMPI_Scan((void *)lpwgts, (void *)sizescan, 2*nparts, IDX_T, MPI_SUM, ctrl->comm); + gkMPI_Allreduce((void *)lpwgts, (void *)gpwgts, 2*nparts, IDX_T, MPI_SUM, ctrl->comm); #ifdef DEBUG_ORDER PrintVector(ctrl, 2*nparts, 0, lpwgts, "Lpwgts"); @@ -508,10 +465,10 @@ void LabelSeparators(CtrlType *ctrl, GraphType *graph, idxtype *lastnode, idxtyp if (ctrl->mype == 0) { printf("SepSizes: "); for (i=0; icomm); + gkMPI_Barrier(ctrl->comm); } for (i=0; i<2*nparts; i++) @@ -522,21 +479,21 @@ void LabelSeparators(CtrlType *ctrl, GraphType *graph, idxtype *lastnode, idxtyp if (where[i] >= nparts) { sid = where[i]; sizescan[sid]++; - ASSERT(ctrl, order[perm[i]] == -1); + PASSERT(ctrl, order[perm[i]] == -1); order[perm[i]] = lastnode[sid] - sizescan[sid]; - /*myprintf(ctrl, "order[%d] = %d, %d\n", perm[i], order[perm[i]], sid); */ + /*myprintf(ctrl, "order[%"PRIDX"] = %"PRIDX", %"PRIDX"\n", perm[i], order[perm[i]], sid); */ } } /* Update lastnode array */ - idxcopy(2*nparts, lastnode, sizescan); + icopy(2*nparts, lastnode, sizescan); for (i=0; inparts; npes = ctrl->npes; @@ -562,7 +520,7 @@ void CompactGraph(CtrlType *ctrl, GraphType *graph, idxtype *perm, where = graph->where; if (graph->cmap == NULL) - graph->cmap = idxmalloc(nvtxs+graph->nrecv, "CompactGraph: cmap"); + graph->cmap = imalloc(nvtxs+graph->nrecv, "CompactGraph: cmap"); cmap = graph->cmap; vtxdist = graph->vtxdist; @@ -571,11 +529,11 @@ void CompactGraph(CtrlType *ctrl, GraphType *graph, idxtype *perm, * Construct the cvtxdist of the contracted graph. Uses the fact * that lpwgts stores the local non separator vertices. **************************************************************/ - cvtxdist = wspace->pv1; - cnvtxs = idxsum(nparts, graph->lpwgts); + cnvtxs = isum(nparts, graph->lpwgts, 1); + cvtxdist = iwspacemalloc(ctrl, npes+1); - MPI_Allgather((void *)&cnvtxs, 1, IDX_DATATYPE, (void *)cvtxdist, 1, IDX_DATATYPE, - ctrl->comm); + gkMPI_Allgather((void *)&cnvtxs, 1, IDX_T, (void *)cvtxdist, 1, IDX_T, + ctrl->comm); MAKECSR(i, npes, cvtxdist); #ifdef DEBUG_ORDER @@ -596,18 +554,18 @@ void CompactGraph(CtrlType *ctrl, GraphType *graph, idxtype *perm, } } - CommInterfaceData(ctrl, graph, cmap, wspace->indices, cmap+nvtxs); + CommInterfaceData(ctrl, graph, cmap, cmap+nvtxs); /************************************************************* * Finally, compact the graph **************************************************************/ - newwhere = idxmalloc(cnvtxs, "CompactGraph: newwhere"); + newwhere = imalloc(cnvtxs, "CompactGraph: newwhere"); cnvtxs = l = 0; for (i=0; i= nparts); + PASSERT(ctrl, where[i] == where[adjncy[j]] || where[adjncy[j]] >= nparts); if (where[i] == where[adjncy[j]]) { adjncy[l] = cmap[adjncy[j]]; adjwgt[l++] = adjwgt[j]; @@ -622,8 +580,8 @@ void CompactGraph(CtrlType *ctrl, GraphType *graph, idxtype *perm, } SHIFTCSR(i, cnvtxs, xadj); - GKfree((void **)&graph->match, (void **)&graph->cmap, (void **)&graph->lperm, - (void **)&graph->where, (void **)&graph->label, (void **)&graph->rinfo, + gk_free((void **)&graph->match, (void **)&graph->cmap, (void **)&graph->lperm, + (void **)&graph->where, (void **)&graph->label, (void **)&graph->ckrinfo, (void **)&graph->nrinfo, (void **)&graph->lpwgts, (void **)&graph->gpwgts, (void **)&graph->sepind, (void **)&graph->peind, (void **)&graph->sendptr, (void **)&graph->sendind, @@ -636,22 +594,9 @@ void CompactGraph(CtrlType *ctrl, GraphType *graph, idxtype *perm, graph->nedges = l; graph->gnvtxs = cvtxdist[npes]; graph->where = newwhere; - idxcopy(npes+1, cvtxdist, graph->vtxdist); + icopy(npes+1, cvtxdist, graph->vtxdist); - /* - { - int i, j, k; - int *mylpwgts; - - mylpwgts = idxsmalloc(nparts, 0, "mylpwgts"); - for (i=0; idbglvl, DBG_TIME, starttimer(ctrl->SerialTmr)); + WCOREPUSH; nvtxs = graph->nvtxs; xadj = graph->xadj; @@ -678,35 +624,25 @@ void LocalNDOrder(CtrlType *ctrl, GraphType *graph, idxtype *order, int firstnod /* Relabel the vertices so that they are in local index space */ for (i=0; i=firstvtx && adjncy[j]=firstvtx && adjncy[j]maxcore); - - perm = wspace->core; - iperm = perm + nvtxs + 5; + perm = iwspacemalloc(ctrl, nvtxs+5); + iperm = iwspacemalloc(ctrl, nvtxs+5); - options[0] = 1; - options[1] = 3; /* ctype = SHEM */ - options[2] = 1; /* itype = GGPKL */ - options[3] = 2; /* rtype = sep1sided */ - options[4] = 0; /* dbglvl */ - options[5] = 1; /* oflags = compress */ - options[6] = -1; /* pfactor */ - options[7] = ctrl->s_nseps; /* nseps */ + METIS_SetDefaultOptions(options); + options[METIS_OPTION_NSEPS] = ctrl->s_nseps; - if (graph->vwgt) - METIS_NodeWND(&nvtxs, xadj, adjncy, graph->vwgt, &numflag, options, perm, iperm); - else - METIS_NodeND(&nvtxs, xadj, adjncy, &numflag, options, perm, iperm); + METIS_NodeND(&nvtxs, xadj, adjncy, graph->vwgt, options, perm, iperm); for (i=0; i=0 && iperm[i]=0 && iperm[i]dbglvl, DBG_TIME, stoptimer(ctrl->SerialTmr)); } diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/parmetis.h b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/parmetis.h index ebb73941..606fce80 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/parmetis.h +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/parmetis.h @@ -15,32 +15,41 @@ #define __parmetis_h__ #include +#include #ifndef _MSC_VER #define __cdecl #endif +#if IDXTYPEWIDTH == 32 + /*#define IDX_T MPI_INT32_T */ + #define IDX_T MPI_INT + #define KEEP_BIT 0x40000000L +#elif IDXTYPEWIDTH == 64 + /*#define IDX_T MPI_INT64_T */ + #define IDX_T MPI_LONG_LONG_INT + #define KEEP_BIT 0x4000000000000000LL +#else + #error "Incorrect user-supplied value fo IDXTYPEWIDTH" +#endif -/************************************************************************* -* Data-structures -**************************************************************************/ -/* Undefine the following #define in order to use short int as the idxtype */ -#define IDXTYPE_INT -/* Indexes are as long as integers for now */ -#ifdef IDXTYPE_INT -typedef int idxtype; +#if REALTYPEWIDTH == 32 + #define REAL_T MPI_FLOAT +#elif REALTYPEWIDTH == 64 + #define REAL_T MPI_DOUBLE #else -typedef short idxtype; + #error "Incorrect user-supplied value fo REALTYPEWIDTH" #endif + /************************************************************************* * Constants **************************************************************************/ -#define PARMETIS_MAJOR_VERSION 3 -#define PARMETIS_MINOR_VERSION 2 -#define PARMETIS_SUBMINOR_VERSION 0 +#define PARMETIS_MAJOR_VERSION 4 +#define PARMETIS_MINOR_VERSION 0 +#define PARMETIS_SUBMINOR_VERSION 3 /************************************************************************* @@ -53,148 +62,78 @@ extern "C" { /*------------------------------------------------------------------- * API Introduced with Release 3.0 (current API) *--------------------------------------------------------------------*/ -void __cdecl ParMETIS_V3_AdaptiveRepart( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *vsize, idxtype *adjwgt, int *wgtflag, int *numflag, int *ncon, - int *nparts, float *tpwgts, float *ubvec, float *ipc2redist, - int *options, int *edgecut, idxtype *part, MPI_Comm *comm); - -void __cdecl ParMETIS_V3_PartGeomKway( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *ndims, float *xyz, - int *ncon, int *nparts, float *tpwgts, float *ubvec, int *options, - int *edgecut, idxtype *part, MPI_Comm *comm); - -void __cdecl ParMETIS_V3_PartGeom( - idxtype *vtxdist, int *ndims, float *xyz, idxtype *part, MPI_Comm *comm); - -void __cdecl ParMETIS_V3_PartKway( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *ncon, int *nparts, - float *tpwgts, float *ubvec, int *options, int *edgecut, idxtype *part, - MPI_Comm *comm); - -void __cdecl ParMETIS_V3_Mesh2Dual( - idxtype *elmdist, idxtype *eptr, idxtype *eind, int *numflag, - int *ncommonnodes, idxtype **xadj, idxtype **adjncy, MPI_Comm *comm); - -void __cdecl ParMETIS_V3_PartMeshKway( - idxtype *elmdist, idxtype *eptr, idxtype *eind, idxtype *elmwgt, - int *wgtflag, int *numflag, int *ncon, int *ncommonnodes, int *nparts, - float *tpwgts, float *ubvec, int *options, int *edgecut, idxtype *part, - MPI_Comm *comm); - -void __cdecl ParMETIS_V3_NodeND( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int *numflag, - int *options, idxtype *order, idxtype *sizes, MPI_Comm *comm); - -void __cdecl ParMETIS_V3_RefineKway( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *ncon, int *nparts, - float *tpwgts, float *ubvec, int *options, int *edgecut, - idxtype *part, MPI_Comm *comm); - -void __cdecl ParMETIS_V32_NodeND( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *numflag, int *mtype, int *rtype, int *p_nseps, int *s_nseps, - float *ubfrac, int *seed, int *dbglvl, idxtype *order, - idxtype *sizes, MPI_Comm *comm); - - - -/*------------------------------------------------------------------ -* Backward compatibility routines with Release 2.0 -*-------------------------------------------------------------------*/ -void __cdecl ParMETIS_PartKway( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, int *options, - int *edgecut, idxtype *part, MPI_Comm *comm); - -void __cdecl ParMETIS_PartGeomKway( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, - int *wgtflag, int *numflag, int *ndims, float *xyz, int *nparts, int *options, - int *edgecut, idxtype *part, MPI_Comm *comm); - -void __cdecl ParMETIS_PartGeom( - idxtype *vtxdist, int *ndims, float *xyz, idxtype *part, MPI_Comm *comm); - -void __cdecl ParMETIS_PartGeomRefine( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *ndims, float *xyz, - int *options, int *edgecut, idxtype *part, MPI_Comm *comm); - -void __cdecl ParMETIS_RefineKway( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *options, int *edgecut, - idxtype *part, MPI_Comm *comm); - -void __cdecl ParMETIS_RepartLDiffusion( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *options, int *edgecut, - idxtype *part, MPI_Comm *comm); - -void __cdecl ParMETIS_RepartGDiffusion( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, - idxtype *adjwgt, int *wgtflag, int *numflag, int *options, int *edgecut, - idxtype *part, MPI_Comm *comm); - -void __cdecl ParMETIS_RepartRemap( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, - int *wgtflag, int *numflag, int *options, int *edgecut, idxtype *part, +int __cdecl ParMETIS_V3_PartKway( + idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t *adjwgt, idx_t *wgtflag, idx_t *numflag, idx_t *ncon, idx_t *nparts, + real_t *tpwgts, real_t *ubvec, idx_t *options, idx_t *edgecut, idx_t *part, MPI_Comm *comm); -void __cdecl ParMETIS_RepartMLRemap( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, - int *wgtflag, int *numflag, int *options, int *edgecut, idxtype *part, +int __cdecl ParMETIS_V3_PartGeomKway( + idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t *adjwgt, idx_t *wgtflag, idx_t *numflag, idx_t *ndims, real_t *xyz, + idx_t *ncon, idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, + idx_t *edgecut, idx_t *part, MPI_Comm *comm); + +int __cdecl ParMETIS_V3_PartGeom( + idx_t *vtxdist, idx_t *ndims, real_t *xyz, idx_t *part, MPI_Comm *comm); + +int __cdecl ParMETIS_V3_RefineKway( + idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t *adjwgt, idx_t *wgtflag, idx_t *numflag, idx_t *ncon, idx_t *nparts, + real_t *tpwgts, real_t *ubvec, idx_t *options, idx_t *edgecut, + idx_t *part, MPI_Comm *comm); + +int __cdecl ParMETIS_V3_AdaptiveRepart( + idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t *vsize, idx_t *adjwgt, idx_t *wgtflag, idx_t *numflag, idx_t *ncon, + idx_t *nparts, real_t *tpwgts, real_t *ubvec, real_t *ipc2redist, + idx_t *options, idx_t *edgecut, idx_t *part, MPI_Comm *comm); + +int __cdecl ParMETIS_V3_Mesh2Dual( + idx_t *elmdist, idx_t *eptr, idx_t *eind, idx_t *numflag, + idx_t *ncommonnodes, idx_t **xadj, idx_t **adjncy, MPI_Comm *comm); + +int __cdecl ParMETIS_V3_PartMeshKway( + idx_t *elmdist, idx_t *eptr, idx_t *eind, idx_t *elmwgt, + idx_t *wgtflag, idx_t *numflag, idx_t *ncon, idx_t *ncommonnodes, idx_t *nparts, + real_t *tpwgts, real_t *ubvec, idx_t *options, idx_t *edgecut, idx_t *part, MPI_Comm *comm); -void __cdecl ParMETIS_NodeND( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, - idxtype *order, idxtype *sizes, MPI_Comm *comm); - -void __cdecl ParMETIS_SerialNodeND( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, - idxtype *order, idxtype *sizes, MPI_Comm *comm); - +int __cdecl ParMETIS_V3_NodeND( + idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *numflag, + idx_t *options, idx_t *order, idx_t *sizes, MPI_Comm *comm); +int __cdecl ParMETIS_V32_NodeND( + idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t *numflag, idx_t *mtype, idx_t *rtype, idx_t *p_nseps, idx_t *s_nseps, + real_t *ubfrac, idx_t *seed, idx_t *dbglvl, idx_t *order, + idx_t *sizes, MPI_Comm *comm); - -/*------------------------------------------------------------------- -* Backward compatibility routines with Release 1.0 -*--------------------------------------------------------------------*/ -void __cdecl PARKMETIS( - idxtype *vtxdist, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, idxtype *adjwgt, - idxtype *part, int *options, MPI_Comm comm); - -void __cdecl PARGKMETIS( - idxtype *vtxdist, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, idxtype *adjwgt, - int ndims, float *xyz, idxtype *part, int *options, MPI_Comm comm); - -void __cdecl PARGRMETIS( - idxtype *vtxdist, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, idxtype *adjwgt, - int ndims, float *xyz, idxtype *part, int *options, MPI_Comm comm); - -void __cdecl PARGMETIS( - idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int ndims, float *xyz, - idxtype *part, int *options, MPI_Comm comm); - -void __cdecl PARRMETIS( - idxtype *vtxdist, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, - idxtype *adjwgt, idxtype *part, int *options, MPI_Comm comm); - -void __cdecl PARUAMETIS( - idxtype *vtxdist, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, - idxtype *adjwgt, idxtype *part, int *options, MPI_Comm comm); - -void __cdecl PARDAMETIS( - idxtype *vtxdist, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, idxtype *adjwgt, - idxtype *part, int *options, MPI_Comm comm); +int __cdecl ParMETIS_SerialNodeND( + idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *numflag, + idx_t *options, idx_t *order, idx_t *sizes, MPI_Comm *comm); #ifdef __cplusplus } #endif +/*------------------------------------------------------------------------ +* Enum type definitions +*-------------------------------------------------------------------------*/ +/*! Operation type codes */ +typedef enum { + PARMETIS_OP_KMETIS, + PARMETIS_OP_GKMETIS, + PARMETIS_OP_GMETIS, + PARMETIS_OP_RMETIS, + PARMETIS_OP_AMETIS, + PARMETIS_OP_OMETIS, + PARMETIS_OP_M2DUAL, + PARMETIS_OP_MKMETIS +} pmoptype_et; + + /************************************************************************* * Various constants used for the different parameters **************************************************************************/ @@ -220,5 +159,10 @@ void __cdecl PARDAMETIS( #define PARMETIS_DBGLVL_MATCHINFO 16 /* Show info on matching */ #define PARMETIS_DBGLVL_RMOVEINFO 32 /* Show info on communication during folding */ #define PARMETIS_DBGLVL_REMAP 64 /* Determines if remapping will take place */ +#define PARMETIS_DBGLVL_TWOHOP 128 /* Performs a 2-hop matching */ +#define PARMETIS_DBGLVL_DROPEDGES 256 /* Drop edges during coarsening */ +#define PARMETIS_DBGLVL_FAST 512 /* Reduces #trials for various steps */ +#define PARMETIS_DBGLVL_ONDISK 1024 /* Saves non-active graphs to disk */ + #endif diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/parmetislib.h b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/parmetislib.h index 70a09caa..7919f468 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/parmetislib.h +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/parmetislib.h @@ -8,7 +8,7 @@ * Started 8/27/94 * George * - * $Id: parmetislib.h,v 1.2 2003/07/21 17:50:22 karypis Exp $ + * $Id: parmetislib.h 10379 2011-06-22 18:51:24Z benjamin $ */ /* @@ -16,19 +16,17 @@ #define DMALLOC 1 */ -#include - -// updated to place in current directory +#include +#include #include -// #include "../parmetis.h" -#ifdef DMALLOC -#include -#endif -#include -#include -#include -#include -#include +#include "gklib_defs.h" +#include "rename.h" +#include "defs.h" +#include "struct.h" +#include "macros.h" +#include "proto.h" + +#define MAXNCON 32 diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/proto.h b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/proto.h index 60a1d518..b85bebcd 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/proto.h +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/proto.h @@ -8,356 +8,340 @@ * Started 10/19/95 * George * - * $Id: proto.h,v 1.11 2003/07/25 13:52:03 karypis Exp $ + * $Id: proto.h 10592 2011-07-16 21:17:53Z karypis $ * */ +/* ctrl.c */ +ctrl_t *SetupCtrl(pmoptype_et optype, idx_t *options, idx_t ncon, idx_t nparts, + real_t *tpwgts, real_t *ubvec, MPI_Comm comm); +void SetupCtrl_invtvwgts(ctrl_t *ctrl, graph_t *graph); +void FreeCtrl(ctrl_t **r_ctrl); + + + /* kmetis.c */ -void Mc_Global_Partition(CtrlType *, GraphType *, WorkSpaceType *); +void Global_Partition(ctrl_t *, graph_t *); /* mmetis.c */ /* gkmetis.c */ /* match.c */ -void Match_Global(CtrlType *, GraphType *, WorkSpaceType *); -void Match_Local(CtrlType *, GraphType *, WorkSpaceType *); -void CreateCoarseGraph_Global(CtrlType *, GraphType *, WorkSpaceType *, int); -void CreateCoarseGraph_Local(CtrlType *, GraphType *, WorkSpaceType *, int); +void Match_Global(ctrl_t *, graph_t *); +void Match_Local(ctrl_t *, graph_t *); +void CreateCoarseGraph_Global(ctrl_t *, graph_t *, idx_t); +void CreateCoarseGraph_Local(ctrl_t *, graph_t *, idx_t); +void DropEdges(ctrl_t *ctrl, graph_t *graph); /* initpart.c */ -void Mc_InitPartition_RB(CtrlType *, GraphType *, WorkSpaceType *); -void Mc_KeepPart(GraphType *, WorkSpaceType *, idxtype *, int); +void InitPartition(ctrl_t *, graph_t *); +void KeepPart(ctrl_t *, graph_t *, idx_t *, idx_t); /* kwayrefine.c */ -void Mc_ProjectPartition(CtrlType *, GraphType *, WorkSpaceType *); -void Mc_ComputePartitionParams(CtrlType *, GraphType *, WorkSpaceType *); +void ProjectPartition(ctrl_t *, graph_t *); +void ComputePartitionParams(ctrl_t *, graph_t *); +void KWayFM(ctrl_t *, graph_t *, idx_t); +void KWayBalance(ctrl_t *, graph_t *, idx_t); -/* kwayfm.c */ -void Mc_KWayFM(CtrlType *, GraphType *, WorkSpaceType *, int); - -/* kwaybalance.c */ -void Mc_KWayBalance(CtrlType *, GraphType *, WorkSpaceType *, int); /* remap.c */ -void ParallelReMapGraph(CtrlType *, GraphType *, WorkSpaceType *); -void ParallelTotalVReMap(CtrlType *, idxtype *, idxtype *, WorkSpaceType *, int, int); -int SimilarTpwgts(float *, int, int, int); +void ParallelReMapGraph(ctrl_t *, graph_t *); +void ParallelTotalVReMap(ctrl_t *, idx_t *, idx_t *, idx_t, idx_t); +idx_t SimilarTpwgts(real_t *, idx_t, idx_t, idx_t); /* move.c */ -GraphType *Mc_MoveGraph(CtrlType *, GraphType *, WorkSpaceType *); -/* move.c */ -void CheckMGraph(CtrlType *, GraphType *); -void ProjectInfoBack(CtrlType *, GraphType *, idxtype *, idxtype *, WorkSpaceType *); -void FindVtxPerm(CtrlType *, GraphType *, idxtype *, WorkSpaceType *); - -/* memory.c */ -void AllocateWSpace(CtrlType *, GraphType *, WorkSpaceType *); -void AdjustWSpace(CtrlType *, GraphType *, WorkSpaceType *); -void FreeWSpace(WorkSpaceType *); -void FreeCtrl(CtrlType *); -GraphType *CreateGraph(void); -void InitGraph(GraphType *); -void FreeGraph(GraphType *graph); -void FreeNonGraphFields(GraphType *graph); -void FreeNonGraphNonSetupFields(GraphType *graph); -void FreeInitialGraphAndRemap(GraphType *, int, int); +graph_t *MoveGraph(ctrl_t *, graph_t *); +void CheckMGraph(ctrl_t *, graph_t *); +void ProjectInfoBack(ctrl_t *, graph_t *, idx_t *, idx_t *); +void FindVtxPerm(ctrl_t *, graph_t *, idx_t *); + +/* wspace.c */ +void AllocateWSpace(ctrl_t *ctrl, size_t nwords); +void AllocateRefinementWorkSpace(ctrl_t *ctrl, idx_t nbrpoolsize); +void FreeWSpace(ctrl_t *); +void *wspacemalloc(ctrl_t *ctrl, size_t nbytes); +idx_t *iwspacemalloc(ctrl_t *ctrl, size_t n); +real_t *rwspacemalloc(ctrl_t *ctrl, size_t n); +ikv_t *ikvwspacemalloc(ctrl_t *ctrl, size_t n); +rkv_t *rkvwspacemalloc(ctrl_t *ctrl, size_t n); +void cnbrpoolReset(ctrl_t *ctrl); +idx_t cnbrpoolGetNext(ctrl_t *ctrl, idx_t nnbrs); /* ametis.c */ -void Adaptive_Partition(CtrlType *, GraphType *, WorkSpaceType *); +void Adaptive_Partition(ctrl_t *, graph_t *); /* rmetis.c */ /* wave.c */ -float WavefrontDiffusion(CtrlType *, GraphType *, idxtype *); +real_t WavefrontDiffusion(ctrl_t *, graph_t *, idx_t *); /* balancemylink.c */ -int BalanceMyLink(CtrlType *, GraphType *, idxtype *, int, int, float *, float, float *, float *, float); +idx_t BalanceMyLink(ctrl_t *, graph_t *, idx_t *, idx_t, idx_t, real_t *, + real_t, real_t *, real_t *, real_t); /* redomylink.c */ -void RedoMyLink(CtrlType *, GraphType *, idxtype *, int, int, float *, float *, float *); +void RedoMyLink(ctrl_t *, graph_t *, idx_t *, idx_t, idx_t, real_t *, real_t *, real_t *); /* initbalance.c */ -void Balance_Partition(CtrlType *, GraphType *, WorkSpaceType *); -GraphType *Mc_AssembleAdaptiveGraph(CtrlType *, GraphType *, WorkSpaceType *); +void Balance_Partition(ctrl_t *, graph_t *); +graph_t *AssembleAdaptiveGraph(ctrl_t *, graph_t *); /* mdiffusion.c */ -int Mc_Diffusion(CtrlType *, GraphType *, idxtype *, idxtype *, idxtype *, WorkSpaceType *, int); -GraphType *ExtractGraph(CtrlType *, GraphType *, idxtype *, idxtype *, idxtype *); +idx_t Mc_Diffusion(ctrl_t *, graph_t *, idx_t *, idx_t *, idx_t *, idx_t); +graph_t *ExtractGraph(ctrl_t *, graph_t *, idx_t *, idx_t *, idx_t *); /* diffutil.c */ -void SetUpConnectGraph(GraphType *, MatrixType *, idxtype *); -void Mc_ComputeMoveStatistics(CtrlType *, GraphType *, int *, int *, int *); - int Mc_ComputeSerialTotalV(GraphType *, idxtype *); -void ComputeLoad(GraphType *, int, float *, float *, int); -void ConjGrad2(MatrixType *, float *, float *, float, float *); -void mvMult2(MatrixType *, float *, float *); -void ComputeTransferVector(int, MatrixType *, float *, float *, int); -int ComputeSerialEdgeCut(GraphType *); -int ComputeSerialTotalV(GraphType *, idxtype *); +void SetUpConnectGraph(graph_t *, matrix_t *, idx_t *); +void Mc_ComputeMoveStatistics(ctrl_t *, graph_t *, idx_t *, idx_t *, idx_t *); + idx_t Mc_ComputeSerialTotalV(graph_t *, idx_t *); +void ComputeLoad(graph_t *, idx_t, real_t *, real_t *, idx_t); +void ConjGrad2(matrix_t *, real_t *, real_t *, real_t, real_t *); +void mvMult2(matrix_t *, real_t *, real_t *); +void ComputeTransferVector(idx_t, matrix_t *, real_t *, real_t *, idx_t); +idx_t ComputeSerialEdgeCut(graph_t *); +idx_t ComputeSerialTotalV(graph_t *, idx_t *); /* akwayfm.c */ -void Mc_KWayAdaptiveRefine(CtrlType *, GraphType *, WorkSpaceType *, int); +void KWayAdaptiveRefine(ctrl_t *, graph_t *, idx_t); /* selectq.c */ -void Mc_DynamicSelectQueue(int, int, int, int, idxtype *, float *, int *, int *, int, float, float); -int Mc_HashVwgts(int, float *); -int Mc_HashVRank(int, int *); +void Mc_DynamicSelectQueue(ctrl_t *ctrl, idx_t nqueues, idx_t ncon, idx_t subdomain1, + idx_t subdomain2, idx_t *currentq, real_t *flows, idx_t *from, idx_t *qnum, + idx_t minval, real_t avgvwgt, real_t maxdiff); +idx_t Mc_HashVwgts(ctrl_t *ctrl, idx_t ncon, real_t *nvwgt); +idx_t Mc_HashVRank(idx_t ncon, idx_t *vwgt); /* csrmatch.c */ -void CSR_Match_SHEM(MatrixType *, idxtype *, idxtype *, idxtype *, int); +void CSR_Match_SHEM(matrix_t *, idx_t *, idx_t *, idx_t *, idx_t); /* serial.c */ -void Mc_SerialKWayAdaptRefine(GraphType *, int, idxtype *, float *, int); -void Mc_ComputeSerialPartitionParams(GraphType *, int, EdgeType *); -int AreAllHVwgtsBelow(int, float, float *, float, float *, float *); -void ComputeHKWayLoadImbalance(int, int, float *, float *); -void SerialRemap(GraphType *, int, idxtype *, idxtype *, idxtype *, float *); +void Mc_ComputeSerialPartitionParams(ctrl_t *ctrl, graph_t *, idx_t); +void Mc_SerialKWayAdaptRefine(ctrl_t *ctrl, graph_t *, idx_t, idx_t *, real_t *, idx_t); +idx_t AreAllHVwgtsBelow(idx_t, real_t, real_t *, real_t, real_t *, real_t *); +void ComputeHKWayLoadImbalance(idx_t, idx_t, real_t *, real_t *); +void SerialRemap(ctrl_t *ctrl, graph_t *, idx_t, idx_t *, idx_t *, idx_t *, real_t *); int SSMIncKeyCmp(const void *, const void *); -void Mc_Serial_FM_2WayRefine(GraphType *, float *, int); -void Serial_SelectQueue(int, float *, float *, int *, int *, FPQueueType [MAXNCON][2]); -int Serial_BetterBalance(int, float *, float *, float *); -float Serial_Compute2WayHLoadImbalance(int, float *, float *); -void Mc_Serial_Balance2Way(GraphType *, float *, float); -void Mc_Serial_Init2WayBalance(GraphType *, float *); -int Serial_SelectQueueOneWay(int, float *, float *, int, FPQueueType [MAXNCON][2]); -void Mc_Serial_Compute2WayPartitionParams(GraphType *); -int Serial_AreAnyVwgtsBelow(int, float, float *, float, float *, float *); +void Mc_Serial_FM_2WayRefine(ctrl_t *ctrl, graph_t *, real_t *, idx_t); +void Serial_SelectQueue(idx_t, real_t *, real_t *, idx_t *, idx_t *, rpq_t **[2]); +idx_t Serial_BetterBalance(idx_t, real_t *, real_t *, real_t *, real_t *); +real_t Serial_Compute2WayHLoadImbalance(idx_t, real_t *, real_t *); +void Mc_Serial_Balance2Way(ctrl_t *ctrl, graph_t *, real_t *, real_t); +void Mc_Serial_Init2WayBalance(ctrl_t *ctrl, graph_t *, real_t *); +idx_t Serial_SelectQueueOneWay(idx_t, real_t *, real_t *, idx_t, rpq_t **[2]); +void Mc_Serial_Compute2WayPartitionParams(ctrl_t *ctrl, graph_t *); +idx_t Serial_AreAnyVwgtsBelow(idx_t, real_t, real_t *, real_t, real_t *, real_t *); /* weird.c */ -void PartitionSmallGraph(CtrlType *, GraphType *, WorkSpaceType *); -void CheckInputs(int partType, int npes, int dbglvl, int *wgtflag, int *iwgtflag, - int *numflag, int *inumflag, int *ncon, int *incon, int *nparts, - int *inparts, float *tpwgts, float **itpwgts, float *ubvec, - float *iubvec, float *ipc2redist, float *iipc2redist, int *options, - int *ioptions, idxtype *part, MPI_Comm *comm); +int CheckInputsPartKway(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t *adjwgt, idx_t *wgtflag, idx_t *numflag, idx_t *ncon, idx_t *nparts, + real_t *tpwgts, real_t *ubvec, idx_t *options, idx_t *edgecut, idx_t *part, + MPI_Comm *comm); +int CheckInputsPartGeomKway(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t *adjwgt, idx_t *wgtflag, idx_t *numflag, idx_t *ndims, real_t *xyz, + idx_t *ncon, idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, + idx_t *edgecut, idx_t *part, MPI_Comm *comm); +int CheckInputsPartGeom(idx_t *vtxdist, idx_t *ndims, real_t *xyz, + idx_t *part, MPI_Comm *comm); +int CheckInputsAdaptiveRepart(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, + idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, idx_t *wgtflag, + idx_t *numflag, idx_t *ncon, idx_t *nparts, real_t *tpwgts, + real_t *ubvec, real_t *ipc2redist, idx_t *options, idx_t *edgecut, + idx_t *part, MPI_Comm *comm); +int CheckInputsNodeND(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, + idx_t *numflag, idx_t *options, idx_t *order, idx_t *sizes, + MPI_Comm *comm); +int CheckInputsPartMeshKway(idx_t *elmdist, idx_t *eptr, idx_t *eind, idx_t *elmwgt, + idx_t *wgtflag, idx_t *numflag, idx_t *ncon, idx_t *ncommon, idx_t *nparts, + real_t *tpwgts, real_t *ubvec, idx_t *options, idx_t *edgecut, idx_t *part, + MPI_Comm *comm); +void PartitionSmallGraph(ctrl_t *, graph_t *); + /* mesh.c */ /* pspases.c */ -GraphType *AssembleEntireGraph(CtrlType *, idxtype *, idxtype *, idxtype *); +graph_t *AssembleEntireGraph(ctrl_t *, idx_t *, idx_t *, idx_t *); /* node_refine.c */ -void AllocateNodePartitionParams(CtrlType *, GraphType *, WorkSpaceType *); -void ComputeNodePartitionParams(CtrlType *, GraphType *, WorkSpaceType *); -void UpdateNodePartitionParams(CtrlType *, GraphType *, WorkSpaceType *); -void KWayNodeRefine_Greedy(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, - int npasses, float ubfrac); -void KWayNodeRefine2Phase(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, - int npasses, float ubfrac); -void KWayNodeRefineInterior(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, - int npasses, float ubfrac); -void PrintNodeBalanceInfo(CtrlType *, int, idxtype *, idxtype *, char *); +void AllocateNodePartitionParams(ctrl_t *, graph_t *); +void ComputeNodePartitionParams(ctrl_t *, graph_t *); +void UpdateNodePartitionParams(ctrl_t *, graph_t *); +void KWayNodeRefine_Greedy(ctrl_t *ctrl, graph_t *graph, idx_t npasses, real_t ubfrac); +void KWayNodeRefine2Phase(ctrl_t *ctrl, graph_t *graph, idx_t npasses, real_t ubfrac); +void KWayNodeRefineInterior(ctrl_t *ctrl, graph_t *graph, idx_t npasses, real_t ubfrac); +void PrintNodeBalanceInfo(ctrl_t *, idx_t, idx_t *, idx_t *, char *); /* initmsection.c */ -void InitMultisection(CtrlType *, GraphType *, WorkSpaceType *); -GraphType *AssembleMultisectedGraph(CtrlType *, GraphType *, WorkSpaceType *); +void InitMultisection(ctrl_t *, graph_t *); +graph_t *AssembleMultisectedGraph(ctrl_t *, graph_t *); /* ometis.c */ -void MultilevelOrder(CtrlType *ctrl, GraphType *graph, idxtype *order, idxtype *sizes, - WorkSpaceType *wspace); -void Order_Partition_Multiple(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace); -void Order_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, - int *nlevels, int clevel); -void LabelSeparators(CtrlType *, GraphType *, idxtype *, idxtype *, idxtype *, idxtype *, WorkSpaceType *); -void CompactGraph(CtrlType *, GraphType *, idxtype *, WorkSpaceType *); -void LocalNDOrder(CtrlType *, GraphType *, idxtype *, int, WorkSpaceType *); +void MultilevelOrder(ctrl_t *ctrl, graph_t *graph, idx_t *order, idx_t *sizes); +void Order_Partition_Multiple(ctrl_t *ctrl, graph_t *graph); +void Order_Partition(ctrl_t *ctrl, graph_t *graph, idx_t *nlevels, idx_t clevel); +void LabelSeparators(ctrl_t *, graph_t *, idx_t *, idx_t *, idx_t *, idx_t *); +void CompactGraph(ctrl_t *, graph_t *, idx_t *); +void LocalNDOrder(ctrl_t *, graph_t *, idx_t *, idx_t); /* xyzpart.c */ -void Coordinate_Partition(CtrlType *, GraphType *, int, float *, int, WorkSpaceType *); -void PartSort(CtrlType *, GraphType *, KeyValueType *, WorkSpaceType *); - - -/* fpqueue.c */ -void FPQueueInit(FPQueueType *, int); -void FPQueueReset(FPQueueType *); -void FPQueueFree(FPQueueType *); -int FPQueueGetSize(FPQueueType *); -int FPQueueInsert(FPQueueType *, int, float); -int FPQueueDelete(FPQueueType *, int); -int FPQueueUpdate(FPQueueType *, int, float); -int FPQueueGetMax(FPQueueType *); -int FPQueueSeeMaxVtx(FPQueueType *); -float FPQueueSeeMaxGain(FPQueueType *); -float FPQueueGetKey(FPQueueType *); -int FPQueueGetQSize(FPQueueType *); -int CheckHeapFloat(FPQueueType *); +void Coordinate_Partition(ctrl_t *, graph_t *, idx_t, real_t *, idx_t); +void IRBinCoordinates(ctrl_t *ctrl, graph_t *graph, idx_t ndims, real_t *xyz, + idx_t nbins, idx_t *bxyz); +void RBBinCoordinates(ctrl_t *ctrl, graph_t *graph, idx_t ndims, real_t *xyz, + idx_t nbins, idx_t *bxyz); +void SampleSort(ctrl_t *, graph_t *, ikv_t *); +void PseudoSampleSort(ctrl_t *, graph_t *, ikv_t *); + /* stat.c */ -void Mc_ComputeSerialBalance(CtrlType *, GraphType *, idxtype *, float *); -void Mc_ComputeParallelBalance(CtrlType *, GraphType *, idxtype *, float *); -void Mc_PrintThrottleMatrix(CtrlType *, GraphType *, float *); -void Mc_ComputeRefineStats(CtrlType *, GraphType *, float *); +void ComputeSerialBalance(ctrl_t *, graph_t *, idx_t *, real_t *); +void ComputeParallelBalance(ctrl_t *, graph_t *, idx_t *, real_t *); +void Mc_PrintThrottleMatrix(ctrl_t *, graph_t *, real_t *); +void PrintPostPartInfo(ctrl_t *ctrl, graph_t *graph, idx_t movestats); +void ComputeMoveStatistics(ctrl_t *, graph_t *, idx_t *, idx_t *, idx_t *); /* debug.c */ -void PrintVector(CtrlType *, int, int, idxtype *, char *); -void PrintVector2(CtrlType *, int, int, idxtype *, char *); -void PrintPairs(CtrlType *, int, KeyValueType *, char *); -void PrintGraph(CtrlType *, GraphType *); -void PrintGraph2(CtrlType *, GraphType *); -void PrintSetUpInfo(CtrlType *ctrl, GraphType *graph); -void PrintTransferedGraphs(CtrlType *, int, idxtype *, idxtype *, idxtype *, idxtype *, idxtype *); -void WriteMetisGraph(int, idxtype *, idxtype *, idxtype *, idxtype *); +void PrintVector(ctrl_t *, idx_t, idx_t, idx_t *, char *); +void PrintVector2(ctrl_t *, idx_t, idx_t, idx_t *, char *); +void PrintPairs(ctrl_t *, idx_t, ikv_t *, char *); +void PrintGraph(ctrl_t *, graph_t *); +void PrintGraph2(ctrl_t *, graph_t *); +void PrintSetUpInfo(ctrl_t *ctrl, graph_t *graph); +void PrintTransferedGraphs(ctrl_t *, idx_t, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *); +void WriteMetisGraph(idx_t, idx_t *, idx_t *, idx_t *, idx_t *); + /* comm.c */ -void CommInterfaceData(CtrlType *, GraphType *, idxtype *, idxtype *, idxtype *); -void CommChangedInterfaceData(CtrlType *, GraphType *, int, idxtype *, idxtype *, KeyValueType *, KeyValueType *, idxtype *); -int GlobalSEMax(CtrlType *, int); -double GlobalSEMaxDouble(CtrlType *, double); -int GlobalSEMin(CtrlType *, int); -int GlobalSESum(CtrlType *, int); -float GlobalSEMaxFloat(CtrlType *, float); -float GlobalSEMinFloat(CtrlType *, float); -float GlobalSESumFloat(CtrlType *, float); +void CommSetup(ctrl_t *, graph_t *); +void CommUpdateNnbrs(ctrl_t *ctrl, idx_t nnbrs); +void CommInterfaceData(ctrl_t *ctrl, graph_t *graph, idx_t *data, idx_t *recvvector); +void CommChangedInterfaceData(ctrl_t *ctrl, graph_t *graph, idx_t nchanged, + idx_t *changed, idx_t *data, ikv_t *sendpairs, ikv_t *recvpairs); +idx_t GlobalSEMax(ctrl_t *, idx_t); +idx_t GlobalSEMaxComm(MPI_Comm comm, idx_t value); +idx_t GlobalSEMin(ctrl_t *, idx_t); +idx_t GlobalSEMinComm(MPI_Comm comm, idx_t value); +idx_t GlobalSESum(ctrl_t *, idx_t); +idx_t GlobalSESumComm(MPI_Comm comm, idx_t value); +real_t GlobalSEMaxFloat(ctrl_t *, real_t); +real_t GlobalSEMinFloat(ctrl_t *, real_t); +real_t GlobalSESumFloat(ctrl_t *, real_t); /* util.c */ -void errexit(char *,...); -void myprintf(CtrlType *, char *f_str,...); -void rprintf(CtrlType *, char *f_str,...); -#ifndef DMALLOC -int *imalloc(int, char *); -idxtype *idxmalloc(int, char *); -float *fmalloc(int, char *); -int *ismalloc(int, int, char *); -idxtype *idxsmalloc(int, idxtype, char *); -void *GKmalloc(int, char *); -#endif -void GKfree(void **,...); -int *iset(int n, int val, int *x); -idxtype * idxset(int n, idxtype val, idxtype *x); -int idxamax(int n, idxtype *x); -int idxamin(int n, idxtype *x); -int idxasum(int n, idxtype *x); -float snorm2(int, float *); -float sdot(int n, float *, float *); -void saxpy(int, float, float *, float *); -void ikeyvalsort_org(int, KeyValueType *); -int IncKeyValueCmp(const void *, const void *); -void dkeyvalsort(int, KeyValueType *); -int DecKeyValueCmp(const void *, const void *); -int BSearch(int, idxtype *, int); -void RandomPermute(int, idxtype *, int); -void FastRandomPermute(int, idxtype *, int); -int ispow2(int); -int log2Int(int); -void BucketSortKeysDec(int, int, idxtype *, idxtype *); -float *sset(int n, float val, float *x); -int iamax(int, int *); -int idxamax_strd(int, idxtype *, int); -int idxamin_strd(int, idxtype *, int); -int samax_strd(int, float *, int); -int sfamax(int, float *); -int samin_strd(int, float *, int); -float idxavg(int, idxtype *); -float savg(int, float *); -int samax(int, float *); -int sfavg(int n, float *x); -int samax2(int, float *); -int samin(int, float *); -int idxsum(int, idxtype *); -int idxsum_strd(int, idxtype *, int); -void idxadd(int, idxtype *, idxtype *); -float ssum(int, float *); -float ssum_strd(int, float *, int); -void sscale(int, float, float *); -void saneg(int, float *); -float BetterVBalance(int, float *, float *, float *); -int IsHBalanceBetterTT(int, float *, float *, float *, float *); -int IsHBalanceBetterFT(int, float *, float *, float *, float *); -int myvalkeycompare(const void *, const void *); -int imyvalkeycompare(const void *, const void *); -float *fsmalloc(int, float, char *); -void saxpy2(int, float, float *, int, float *, int); -void GetThreeMax(int, float *, int *, int *, int *); - -/* qsort_special.c */ -void iidxsort(int, idxtype *); -void iintsort(int, int *); -void ikeysort(int, KeyValueType *); -void ikeyvalsort(int, KeyValueType *); - -/* grsetup.c */ -GraphType *Mc_SetUpGraph(CtrlType *, int, idxtype *, idxtype *, idxtype *, idxtype *, idxtype *, int *); -void SetUpCtrl(CtrlType *ctrl, int, int, MPI_Comm); -void SetUpComm(CtrlType *ctrl, MPI_Comm comm); -void ChangeNumbering(idxtype *, idxtype *, idxtype *, idxtype *, int, int, int); -void ChangeNumberingMesh(idxtype *elmdist, idxtype *eptr, idxtype *eind, - idxtype *xadj, idxtype *adjncy, idxtype *part, - int npes, int mype, int from); -void GraphRandomPermute(GraphType *); -void ComputeMoveStatistics(CtrlType *, GraphType *, int *, int *, int *); +void myprintf(ctrl_t *ctrl, char *f_str,...); +void rprintf(ctrl_t *ctrl, char *f_str,...); +void mypridx_tf(ctrl_t *, char *f_str,...); +void rpridx_tf(ctrl_t *, char *f_str,...); +idx_t BSearch(idx_t, idx_t *, idx_t); +void RandomPermute(idx_t, idx_t *, idx_t); +void FastRandomPermute(idx_t, idx_t *, idx_t); +idx_t ispow2(idx_t); +idx_t log2Int(idx_t); +void BucketSortKeysDec(idx_t, idx_t, idx_t *, idx_t *); +real_t BetterVBalance(idx_t, real_t *, real_t *, real_t *); +idx_t IsHBalanceBetterTT(idx_t, real_t *, real_t *, real_t *, real_t *); +idx_t IsHBalanceBetterFT(idx_t, real_t *, real_t *, real_t *, real_t *); +void GetThreeMax(idx_t, real_t *, idx_t *, idx_t *, idx_t *); +size_t rargmax_strd(size_t n, real_t *x, size_t incx); +size_t rargmin_strd(size_t n, real_t *x, size_t incx); +size_t rargmax2(size_t n, real_t *x); +real_t ravg(size_t n, real_t *x); +real_t rfavg(size_t n, real_t *x); + +/* graph.c */ +graph_t *SetupGraph(ctrl_t *ctrl, idx_t ncon, idx_t *vtxdist, idx_t *xadj, + idx_t *vwgt, idx_t *vsize, idx_t *adjncy, idx_t *adjwgt, + idx_t wgtflag); +void SetupGraph_nvwgts(ctrl_t *ctrl, graph_t *graph); +graph_t *CreateGraph(void); +void InitGraph(graph_t *); +void FreeGraph(graph_t *graph); +void FreeNonGraphFields(graph_t *graph); +void FreeNonGraphNonSetupFields(graph_t *graph); +void FreeCommSetupFields(graph_t *graph); +void FreeInitialGraphAndRemap(graph_t *graph); +void graph_WriteToDisk(ctrl_t *ctrl, graph_t *graph); +void graph_ReadFromDisk(ctrl_t *ctrl, graph_t *graph); + +/* renumber.c */ +void ChangeNumbering(idx_t *, idx_t *, idx_t *, idx_t *, idx_t, idx_t, idx_t); +void ChangeNumberingMesh(idx_t *elmdist, idx_t *eptr, idx_t *eind, + idx_t *xadj, idx_t *adjncy, idx_t *part, + idx_t npes, idx_t mype, idx_t from); /* timer.c */ -void InitTimers(CtrlType *); -void PrintTimingInfo(CtrlType *); -void PrintTimer(CtrlType *, timer, char *); - -/* setup.c */ -void SetUp(CtrlType *, GraphType *, WorkSpaceType *); -int Home_PE(int, int, idxtype *, int); - - -/*********************/ -/* METIS subroutines */ -/*********************/ -void METIS_WPartGraphKway2(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -void METIS_mCPartGraphRecursive2(int *, int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -int MCMlevelRecursiveBisection2(CtrlType *, GraphType *, int, float *, idxtype *, float, int); -void METIS_PartGraphKway(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, int *, int *, idxtype *); -void METIS_mCPartGraphKway(int *, int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, float *, int *, int *, idxtype *); -void METIS_EdgeComputeSeparator(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, idxtype *); -void METIS_NodeComputeSeparator(int *, idxtype *, idxtype *, idxtype *, idxtype *, float *, int *, int *, idxtype *); -void METIS_NodeND(int *, idxtype *, idxtype *, int *, int *, idxtype *, idxtype *); -void METIS_NodeWND(int *, idxtype *, idxtype *, idxtype *, int *, int *, idxtype *, idxtype *); -void METIS_NodeNDP(int, idxtype *, idxtype *, int, int *, idxtype *, idxtype *, idxtype *); - - - -/***********************/ -/* TESTing subroutines */ -/***********************/ - -/* pio.c */ -void ParallelReadGraph(GraphType *, char *, MPI_Comm); -void Mc_ParallelWriteGraph(CtrlType *, GraphType *, char *, int, int); -void ReadTestGraph(GraphType *, char *, MPI_Comm); -float *ReadTestCoordinates(GraphType *, char *, int *, MPI_Comm); -void ReadMetisGraph(char *, int *, idxtype **, idxtype **); -void Mc_SerialReadGraph(GraphType *, char *, int *, MPI_Comm); -void Mc_SerialReadMetisGraph(char *, int *, int *, int *, int *, idxtype **, idxtype **, idxtype **, idxtype **, int *); - -/* adaptgraph */ -void AdaptGraph(GraphType *, int, MPI_Comm); -void AdaptGraph2(GraphType *, int, MPI_Comm); -void Mc_AdaptGraph(GraphType *, idxtype *, int, int, MPI_Comm); - - -/* ptest.c */ -void TestParMetis_GPart(char *filename, char *xyzfile, MPI_Comm comm); -int ComputeRealCut(idxtype *, idxtype *, char *, MPI_Comm); -int ComputeRealCutFromMoved(idxtype *, idxtype *, idxtype *, idxtype *, char *, MPI_Comm); -void TestMoveGraph(GraphType *, GraphType *, idxtype *, MPI_Comm); -GraphType *SetUpGraph(CtrlType *, idxtype *, idxtype *, idxtype *, idxtype *, idxtype *, int); - -/* mienio.c */ -void mienIO(MeshType *, char *, int, int, MPI_Comm); - -/* meshio.c */ -void ParallelReadMesh(MeshType *, char *, MPI_Comm); - -/* parmetis.c */ -void ChangeToFortranNumbering(idxtype *, idxtype *, idxtype *, int, int); -void METIS_NodeRefine(int nvtxs, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, - idxtype *adjwgt, idxtype *where, idxtype *hmarker, float ubfactor); +void InitTimers(ctrl_t *); +void PrintTimingInfo(ctrl_t *); +void PrintTimer(ctrl_t *, timer, char *); +/* parmetis.c */ +void ChangeToFortranNumbering(idx_t *, idx_t *, idx_t *, idx_t, idx_t); + + +/* msetup.c */ +mesh_t *SetUpMesh(idx_t *etype, idx_t *ncon, idx_t *elmdist, idx_t *elements, + idx_t *elmwgt, idx_t *wgtflag, MPI_Comm *comm); +mesh_t *CreateMesh(void); +void InitMesh(mesh_t *mesh); + + +/* gkmpi.c */ +int gkMPI_Comm_size(MPI_Comm comm, idx_t *size); +int gkMPI_Comm_rank(MPI_Comm comm, idx_t *rank); +int gkMPI_Get_count(MPI_Status *status, MPI_Datatype datatype, + idx_t *count); +int gkMPI_Send(void *buf, idx_t count, MPI_Datatype datatype, idx_t dest, + idx_t tag, MPI_Comm comm); +int gkMPI_Recv(void *buf, idx_t count, MPI_Datatype datatype, + idx_t source, idx_t tag, MPI_Comm comm, MPI_Status *status); +int gkMPI_Isend(void *buf, idx_t count, MPI_Datatype datatype, idx_t dest, + idx_t tag, MPI_Comm comm, MPI_Request *request); +int gkMPI_Irecv(void *buf, idx_t count, MPI_Datatype datatype, + idx_t source, idx_t tag, MPI_Comm comm, MPI_Request *request); +int gkMPI_Wait(MPI_Request *request, MPI_Status *status); +int gkMPI_Waitall(idx_t count, MPI_Request *array_of_requests, + MPI_Status *array_of_statuses); +int gkMPI_Barrier(MPI_Comm comm); +int gkMPI_Bcast(void *buffer, idx_t count, MPI_Datatype datatype, + idx_t root, MPI_Comm comm); +int gkMPI_Reduce(void *sendbuf, void *recvbuf, idx_t count, + MPI_Datatype datatype, MPI_Op op, idx_t root, MPI_Comm comm); +int gkMPI_Allreduce(void *sendbuf, void *recvbuf, idx_t count, + MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); +int gkMPI_Scan(void *sendbuf, void *recvbuf, idx_t count, + MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); +int gkMPI_Allgather(void *sendbuf, idx_t sendcount, + MPI_Datatype sendtype, void *recvbuf, idx_t recvcount, + MPI_Datatype recvtype, MPI_Comm comm); +int gkMPI_Alltoall(void *sendbuf, idx_t sendcount, + MPI_Datatype sendtype, void *recvbuf, idx_t recvcount, + MPI_Datatype recvtype, MPI_Comm comm); +int gkMPI_Alltoallv(void *sendbuf, idx_t *sendcounts, + idx_t *sdispls, MPI_Datatype sendtype, void *recvbuf, + idx_t *recvcounts, idx_t *rdispls, MPI_Datatype recvtype, + MPI_Comm comm); +int gkMPI_Allgatherv(void *sendbuf, idx_t sendcount, MPI_Datatype sendtype, + void *recvbuf, idx_t *recvcounts, idx_t *rdispls, + MPI_Datatype recvtype, MPI_Comm comm); +int gkMPI_Scatterv(void *sendbuf, idx_t *sendcounts, idx_t *sdispls, + MPI_Datatype sendtype, void *recvbuf, idx_t recvcount, + MPI_Datatype recvtype, idx_t root, MPI_Comm comm); +int gkMPI_Gatherv(void *sendbuf, idx_t sendcount, MPI_Datatype sendtype, + void *recvbuf, idx_t *recvcounts, idx_t *displs, MPI_Datatype recvtype, + idx_t root, MPI_Comm comm); +int gkMPI_Comm_split(MPI_Comm comm, idx_t color, idx_t key, + MPI_Comm *newcomm); +int gkMPI_Comm_free(MPI_Comm *comm); +int gkMPI_Finalize(); diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/pspases.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/pspases.c index 421dae2d..2a5d4dd4 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/pspases.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/pspases.c @@ -7,7 +7,7 @@ * Started 10/14/97 * George * - * $Id: pspases.c,v 1.3 2003/07/21 17:18:53 karypis Exp $ + * $Id: pspases.c 10535 2011-07-11 04:29:44Z karypis $ * */ @@ -17,65 +17,51 @@ /*********************************************************************************** * This function is the entry point of the serial ordering algorithm. ************************************************************************************/ -void ParMETIS_SerialNodeND(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int *numflag, - int *options, idxtype *order, idxtype *sizes, MPI_Comm *comm) +int ParMETIS_SerialNodeND(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, + idx_t *numflag, idx_t *options, idx_t *order, idx_t *sizes, + MPI_Comm *comm) { - int i, npes, mype, seroptions[10]; - CtrlType ctrl; - GraphType *agraph; - idxtype *perm=NULL, *iperm=NULL; - int *sendcount, *displs; + idx_t i, npes, mype; + ctrl_t *ctrl=NULL; + graph_t *agraph=NULL; + idx_t *perm=NULL, *iperm=NULL; + idx_t *sendcount, *displs; - MPI_Comm_size(*comm, &npes); - MPI_Comm_rank(*comm, &mype); + /* Setup the ctrl */ + ctrl = SetupCtrl(PARMETIS_OP_OMETIS, options, 1, 1, NULL, NULL, *comm); + npes = ctrl->npes; + mype = ctrl->mype; if (!ispow2(npes)) { if (mype == 0) printf("Error: The number of processors must be a power of 2!\n"); - return; + FreeCtrl(&ctrl); + return METIS_ERROR; } - if (*numflag == 1) - ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 1); - - SetUpCtrl(&ctrl, npes, options[OPTION_DBGLVL], *comm); - - IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.MoveTmr)); + if (*numflag > 0) + ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 1); - agraph = AssembleEntireGraph(&ctrl, vtxdist, xadj, adjncy); + STARTTIMER(ctrl, ctrl->TotalTmr); + STARTTIMER(ctrl, ctrl->MoveTmr); - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.MoveTmr)); + agraph = AssembleEntireGraph(ctrl, vtxdist, xadj, adjncy); + STOPTIMER(ctrl, ctrl->MoveTmr); if (mype == 0) { - perm = idxmalloc(agraph->nvtxs, "PAROMETISS: perm"); - iperm = idxmalloc(agraph->nvtxs, "PAROMETISS: iperm"); - - seroptions[0] = 0; - /* - seroptions[1] = 3; - seroptions[2] = 1; - seroptions[3] = 2; - seroptions[4] = 128; - seroptions[5] = 1; - seroptions[6] = 0; - seroptions[7] = 1; - */ - - METIS_NodeNDP(agraph->nvtxs, agraph->xadj, agraph->adjncy, npes, seroptions, perm, iperm, sizes); + perm = imalloc(agraph->nvtxs, "PAROMETISS: perm"); + iperm = imalloc(agraph->nvtxs, "PAROMETISS: iperm"); + + METIS_NodeNDP(agraph->nvtxs, agraph->xadj, agraph->adjncy, + agraph->vwgt, npes, NULL, perm, iperm, sizes); } - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.MoveTmr)); + STARTTIMER(ctrl, ctrl->MoveTmr); /* Broadcast the sizes array */ - MPI_Bcast((void *)sizes, 2*npes, IDX_DATATYPE, 0, ctrl.gcomm); + gkMPI_Bcast((void *)sizes, 2*npes, IDX_T, 0, ctrl->gcomm); /* Scatter the iperm */ sendcount = imalloc(npes, "PAROMETISS: sendcount"); @@ -85,23 +71,25 @@ void ParMETIS_SerialNodeND(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int displs[i] = vtxdist[i]; } - MPI_Scatterv((void *)iperm, sendcount, displs, IDX_DATATYPE, (void *)order, vtxdist[mype+1]-vtxdist[mype], IDX_DATATYPE, 0, ctrl.gcomm); + gkMPI_Scatterv((void *)iperm, sendcount, displs, IDX_T, (void *)order, + vtxdist[mype+1]-vtxdist[mype], IDX_T, 0, ctrl->gcomm); - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.MoveTmr)); + STOPTIMER(ctrl, ctrl->MoveTmr); + STOPTIMER(ctrl, ctrl->TotalTmr); + IFSET(ctrl->dbglvl, DBG_TIME, PrintTimingInfo(ctrl)); + IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->gcomm)); - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); - IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); + gk_free((void **)&agraph->xadj, &agraph->adjncy, &perm, &iperm, + &sendcount, &displs, &agraph, LTERM); - GKfree((void **)&agraph->xadj, &agraph->adjncy, &perm, &iperm, &sendcount, - &displs, &agraph, LTERM); - FreeCtrl(&ctrl); - - if (*numflag == 1) + if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 0); + goto DONE; + +DONE: + FreeCtrl(&ctrl); + return METIS_OK; } @@ -109,13 +97,13 @@ void ParMETIS_SerialNodeND(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int /************************************************************************* * This function assembles the graph into a single processor **************************************************************************/ -GraphType *AssembleEntireGraph(CtrlType *ctrl, idxtype *vtxdist, idxtype *xadj, idxtype *adjncy) +graph_t *AssembleEntireGraph(ctrl_t *ctrl, idx_t *vtxdist, idx_t *xadj, idx_t *adjncy) { - int i, gnvtxs, nvtxs, gnedges, nedges; - int npes = ctrl->npes, mype = ctrl->mype; - idxtype *axadj, *aadjncy; - int *recvcounts, *displs; - GraphType *agraph; + idx_t i, gnvtxs, nvtxs, gnedges, nedges; + idx_t npes = ctrl->npes, mype = ctrl->mype; + idx_t *axadj, *aadjncy; + idx_t *recvcounts, *displs; + graph_t *agraph; gnvtxs = vtxdist[npes]; nvtxs = vtxdist[mype+1]-vtxdist[mype]; @@ -128,7 +116,7 @@ GraphType *AssembleEntireGraph(CtrlType *ctrl, idxtype *vtxdist, idxtype *xadj, for (i=0; icomm); + gkMPI_Gatherv((void *)xadj, nvtxs, IDX_T, axadj, recvcounts, displs, + IDX_T, 0, ctrl->comm); MAKECSR(i, nvtxs, xadj); MAKECSR(i, gnvtxs, axadj); /* Gather all the adjncy arrays next */ /* Determine the # of edges stored at each processor */ - MPI_Allgather((void *)(&nedges), 1, MPI_INT, (void *)recvcounts, 1, MPI_INT, ctrl->comm); + gkMPI_Allgather((void *)(&nedges), 1, IDX_T, (void *)recvcounts, 1, IDX_T, ctrl->comm); displs[0] = 0; for (i=1; icomm); + gkMPI_Gatherv((void *)adjncy, nedges, IDX_T, aadjncy, recvcounts, displs, IDX_T, 0, ctrl->comm); - /* myprintf(ctrl, "Gnvtxs: %d, Gnedges: %d\n", gnvtxs, gnedges); */ + /* myprintf(ctrl, "Gnvtxs: %"PRIDX", Gnedges: %"PRIDX"\n", gnvtxs, gnedges); */ agraph = CreateGraph(); agraph->nvtxs = gnvtxs; diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/redomylink.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/redomylink.c index 8635bc44..ccbdb994 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/redomylink.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/redomylink.c @@ -8,7 +8,7 @@ * Started 7/23/97 * George * - * $Id: redomylink.c,v 1.2 2003/07/21 17:18:53 karypis Exp $ + * $Id: redomylink.c 10542 2011-07-11 16:56:22Z karypis $ */ #include @@ -17,58 +17,63 @@ /************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ -void RedoMyLink(CtrlType *ctrl, GraphType *graph, idxtype *home, int me, - int you, float *flows, float *sr_cost, float *sr_lbavg) +void RedoMyLink(ctrl_t *ctrl, graph_t *graph, idx_t *home, idx_t me, + idx_t you, real_t *flows, real_t *sr_cost, real_t *sr_lbavg) { - int h, i, r; - int nvtxs, nedges, ncon; - int pass, lastseed, totalv; - idxtype *xadj, *adjncy, *adjwgt, *where, *vsize; - idxtype *costwhere, *lbwhere, *selectwhere; - idxtype *rdata, *ed, *id, *bndptr, *bndind, *perm; - float *nvwgt, mycost; - float lbavg, lbvec[MAXNCON]; - float best_lbavg, other_lbavg = -1.0, bestcost, othercost = -1.0; - float npwgts[2*MAXNCON], pwgts[MAXNCON*2], tpwgts[MAXNCON*2]; - float ipc_factor, redist_factor, ftmp; -int mype; -MPI_Comm_rank(MPI_COMM_WORLD, &mype); - - nvtxs = graph->nvtxs; + idx_t h, i, r; + idx_t nvtxs, nedges, ncon; + idx_t pass, lastseed, totalv; + idx_t *xadj, *adjncy, *adjwgt, *where, *vsize; + idx_t *costwhere, *lbwhere, *selectwhere; + idx_t *ed, *id, *bndptr, *bndind, *perm; + real_t *nvwgt, mycost; + real_t lbavg, *lbvec; + real_t best_lbavg, other_lbavg = -1.0, bestcost, othercost = -1.0; + real_t *npwgts, *pwgts, *tpwgts; + real_t ipc_factor, redist_factor, ftmp; + idx_t mype; + gkMPI_Comm_rank(MPI_COMM_WORLD, &mype); + + + WCOREPUSH; + + nvtxs = graph->nvtxs; nedges = graph->nedges; - ncon = graph->ncon; - xadj = graph->xadj; - nvwgt = graph->nvwgt; - vsize = graph->vsize; + ncon = graph->ncon; + xadj = graph->xadj; + nvwgt = graph->nvwgt; + vsize = graph->vsize; adjncy = graph->adjncy; adjwgt = graph->adjwgt; - where = graph->where; - ipc_factor = ctrl->ipc_factor; + where = graph->where; + + ipc_factor = ctrl->ipc_factor; redist_factor = ctrl->redist_factor; - /**************************/ /* set up data structures */ - /**************************/ - rdata = idxmalloc(7*nvtxs, "rdata"); - id = graph->sendind = rdata; - ed = graph->recvind = rdata + nvtxs; - bndptr = graph->sendptr = rdata + 2*nvtxs; - bndind = graph->recvptr = rdata + 3*nvtxs; - costwhere = rdata + 4*nvtxs; - lbwhere = rdata + 5*nvtxs; - perm = rdata + 6*nvtxs; + id = graph->sendind = iwspacemalloc(ctrl, nvtxs); + ed = graph->recvind = iwspacemalloc(ctrl, nvtxs); + bndptr = graph->sendptr = iwspacemalloc(ctrl, nvtxs); + bndind = graph->recvptr = iwspacemalloc(ctrl, nvtxs); + + costwhere = iwspacemalloc(ctrl, nvtxs); + lbwhere = iwspacemalloc(ctrl, nvtxs); + perm = iwspacemalloc(ctrl, nvtxs); + + lbvec = rwspacemalloc(ctrl, ncon); + pwgts = rset(2*ncon, 0.0, rwspacemalloc(ctrl, 2*ncon)); + npwgts = rwspacemalloc(ctrl, 2*ncon); + tpwgts = rwspacemalloc(ctrl, 2*ncon); + graph->gnpwgts = npwgts; RandomPermute(nvtxs, perm, 1); - idxcopy(nvtxs, where, costwhere); - idxcopy(nvtxs, where, lbwhere); + icopy(nvtxs, where, costwhere); + icopy(nvtxs, where, lbwhere); - /*****************************/ - /* compute target pwgts */ - /*****************************/ - sset(ncon*2, 0.0, pwgts); + /* compute target pwgts */ for (h=0; h0; pass--) { - idxset(nvtxs, 1, where); + for (pass=N_MOC_REDO_PASSES; pass>0; pass--) { + iset(nvtxs, 1, where); - /***************************/ - /* find seed vertices */ - /***************************/ + /* find seed vertices */ r = perm[lastseed] % nvtxs; lastseed = (lastseed+1) % nvtxs; where[r] = 0; - Mc_Serial_Compute2WayPartitionParams(graph); - Mc_Serial_Init2WayBalance(graph, tpwgts); - Mc_Serial_FM_2WayRefine(graph, tpwgts, 4); - Mc_Serial_Balance2Way(graph, tpwgts, 1.02); - Mc_Serial_FM_2WayRefine(graph, tpwgts, 4); + Mc_Serial_Compute2WayPartitionParams(ctrl, graph); + Mc_Serial_Init2WayBalance(ctrl, graph, tpwgts); + Mc_Serial_FM_2WayRefine(ctrl, graph, tpwgts, 4); + Mc_Serial_Balance2Way(ctrl, graph, tpwgts, 1.02); + Mc_Serial_FM_2WayRefine(ctrl, graph, tpwgts, 4); for (i=0; imincut)*ipc_factor + (float)totalv*redist_factor; + mycost = (real_t)(graph->mincut)*ipc_factor + (real_t)totalv*redist_factor; if (bestcost >= mycost) { bestcost = mycost; other_lbavg = lbavg; - idxcopy(nvtxs, where, costwhere); + icopy(nvtxs, where, costwhere); } if (best_lbavg >= lbavg) { best_lbavg = lbavg; othercost = mycost; - idxcopy(nvtxs, where, lbwhere); + icopy(nvtxs, where, lbwhere); } } @@ -167,9 +170,8 @@ MPI_Comm_rank(MPI_COMM_WORLD, &mype); *sr_lbavg = best_lbavg; } - idxcopy(nvtxs, selectwhere, where); + icopy(nvtxs, selectwhere, where); - GKfree((void **)&rdata, LTERM); - return; + WCOREPOP; } diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/remap.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/remap.c index 2c33b668..800ca1b5 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/remap.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/remap.c @@ -7,7 +7,7 @@ * Started 4/16/98 * George * - * $Id: remap.c,v 1.2 2003/07/21 17:18:53 karypis Exp $ + * $Id: remap.c 10361 2011-06-21 19:16:22Z karypis $ * */ @@ -17,12 +17,12 @@ * This function remaps that graph so that it will minimize the * redistribution cost **************************************************************************/ -void ParallelReMapGraph(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) +void ParallelReMapGraph(ctrl_t *ctrl, graph_t *graph) { - int i, nvtxs, nparts; - idxtype *where, *vsize, *map, *lpwgts; + idx_t i, nvtxs, nparts; + idx_t *where, *vsize, *map, *lpwgts; - IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); + IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->RemapTmr)); if (ctrl->npes != ctrl->nparts) { @@ -30,23 +30,27 @@ void ParallelReMapGraph(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) return; } - nvtxs = graph->nvtxs; - where = graph->where; - vsize = graph->vsize; + WCOREPUSH; + + nvtxs = graph->nvtxs; + where = graph->where; + vsize = graph->vsize; nparts = ctrl->nparts; - map = wspace->pv1; - lpwgts = idxset(nparts, 0, wspace->pv2); + map = iwspacemalloc(ctrl, nparts); + lpwgts = iset(nparts, 0, iwspacemalloc(ctrl, nparts)); for (i=0; incon); + ParallelTotalVReMap(ctrl, lpwgts, map, NREMAP_PASSES, graph->ncon); for (i=0; idbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); + WCOREPOP; + + IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->RemapTmr)); } @@ -55,27 +59,28 @@ void ParallelReMapGraph(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) * This function computes the assignment using the the objective the * minimization of the total volume of data that needs to move **************************************************************************/ -void ParallelTotalVReMap(CtrlType *ctrl, idxtype *lpwgts, idxtype *map, - WorkSpaceType *wspace, int npasses, int ncon) +void ParallelTotalVReMap(ctrl_t *ctrl, idx_t *lpwgts, idx_t *map, idx_t npasses, idx_t ncon) { - int i, ii, j, k, nparts, mype; - int pass, maxipwgt, nmapped, oldwgt, newwgt, done; - idxtype *rowmap, *mylpwgts; - KeyValueType *recv, send; - int nsaved, gnsaved; + idx_t i, ii, j, k, nparts, mype; + idx_t pass, maxipwgt, nmapped, oldwgt, newwgt, done; + idx_t *rowmap, *mylpwgts; + ikv_t *recv, send; + idx_t nsaved, gnsaved; + + WCOREPUSH; mype = ctrl->mype; nparts = ctrl->nparts; - recv = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*nparts, "remap: recv"); - mylpwgts = idxmalloc(nparts, "mylpwgts"); + rowmap = iset(nparts, -1, iwspacemalloc(ctrl, nparts)); + mylpwgts = icopy(nparts, lpwgts, iwspacemalloc(ctrl, nparts)); + recv = ikvwspacemalloc(ctrl, nparts); + + iset(nparts, -1, map); done = nmapped = 0; - idxset(nparts, -1, map); - rowmap = idxset(nparts, -1, wspace->pv3); - idxcopy(nparts, lpwgts, mylpwgts); for (pass=0; pass 0 && !done) { send.key = -mylpwgts[maxipwgt]; @@ -87,9 +92,9 @@ void ParallelTotalVReMap(CtrlType *ctrl, idxtype *lpwgts, idxtype *map, } /* each processor sends its selection */ - MPI_Allgather((void *)&send, 2, IDX_DATATYPE, (void *)recv, 2, IDX_DATATYPE, ctrl->comm); + gkMPI_Allgather((void *)&send, 2, IDX_T, (void *)recv, 2, IDX_T, ctrl->comm); - ikeysort(nparts, recv); + ikvsorti(nparts, recv); if (recv[0].key == 0) break; @@ -100,8 +105,8 @@ void ParallelTotalVReMap(CtrlType *ctrl, idxtype *lpwgts, idxtype *map, if (i == -1) continue; - j = i % nparts; - k = i / nparts; + j = i%nparts; + k = i/nparts; if (map[j] == -1 && rowmap[k] == -1 && SimilarTpwgts(ctrl->tpwgts, ncon, j, k)) { map[j] = k; rowmap[k] = j; @@ -154,11 +159,11 @@ void ParallelTotalVReMap(CtrlType *ctrl, idxtype *lpwgts, idxtype *map, for (i=0; idbglvl, DBG_REMAP, rprintf(ctrl, "Savings from parallel remapping: %d\n", amax(0,gnsaved))); + IFSET(ctrl->dbglvl, DBG_REMAP, rprintf(ctrl, + "Savings from parallel remapping: %"PRIDX"\n",gk_max(0,gnsaved))); } - GKfree((void **)&recv, (void **)&mylpwgts, LTERM); - + WCOREPOP; } @@ -166,9 +171,9 @@ void ParallelTotalVReMap(CtrlType *ctrl, idxtype *lpwgts, idxtype *map, * This function computes the assignment using the the objective the * minimization of the total volume of data that needs to move **************************************************************************/ -int SimilarTpwgts(float *tpwgts, int ncon, int s1, int s2) +idx_t SimilarTpwgts(real_t *tpwgts, idx_t ncon, idx_t s1, idx_t s2) { - int i; + idx_t i; for (i=0; i SMALLFLOAT) diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/rename.h b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/rename.h index a74425ff..78e99dd2 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/rename.h +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/rename.h @@ -1,293 +1,182 @@ -/* kmetis.c */ -#define Mc_Global_Partition Mc_Global_Partition__ - -/* mmetis.c */ - -/* gkmetis.c */ - -/* match.c */ -#define Match_Global Match_Global__ -#define Match_Local Match_Local__ -#define CreateCoarseGraph_Global CreateCoarseGraph_Global__ -#define CreateCoarseGraph_Local CreateCoarseGraph_Local__ - - -/* initpart.c */ -#define Mc_InitPartition_RB Mc_InitPartition_RB__ -#define Mc_KeepPart Mc_KeepPart__ - -/* kwayrefine.c */ -#define Mc_ProjectPartition Mc_ProjectPartition__ -#define Mc_ComputePartitionParams Mc_ComputePartitionParams__ - -/* kwayfm.c */ -#define Mc_KWayFM Mc_KWayFM__ - -/* kwaybalance.c */ -#define Mc_KWayBalance Mc_KWayBalance__ - -/* remap.c */ -#define ParallelReMapGraph ParallelReMapGraph__ -#define ParallelTotalVReMap ParallelTotalVReMap__ -#define SimilarTpwgts SimilarTpwgts__ - -/* move.c */ -#define Mc_MoveGraph Mc_MoveGraph__ -#define CheckMGraph CheckMGraph__ -#define ProjectInfoBack ProjectInfoBack__ -#define FindVtxPerm FindVtxPerm__ - -/* memory.c */ -#define PreAllocateMemory PreAllocateMemory__ -#define FreeWSpace FreeWSpace__ -#define FreeCtrl FreeCtrl__ -#define CreateGraph CreateGraph__ -#define InitGraph InitGraph__ -#define FreeGraph FreeGraph__ -#define FreeNonGraphFields FreeNonGraphFields__ -#define FreeInitialGraphAndRemap FreeInitialGraphAndRemap__ - - -/************************/ -/* Adaptive subroutines */ -/************************/ -/* ametis.c */ -#define Adaptive_Partition Adaptive_Partition__ - -/* rmetis.c */ - -/* wave.c */ -#define WavefrontDiffusion WavefrontDiffusion__ - -/* balancemylink.c */ -#define BalanceMyLink BalanceMyLink__ - -/* redomylink.c */ -#define RedoMyLink RedoMyLink__ - -/* initbalance.c */ -#define Balance_Partition Balance_Partition__ -#define Mc_AssembleAdaptiveGraph Mc_AssembleAdaptiveGraph__ - -/* mdiffusion.c */ -#define Mc_Diffusion Mc_Diffusion__ -#define ExtractGraph ExtractGraph__ - -/* diffutil.c */ -#define SetUpConnectGraph SetUpConnectGraph__ -#define Mc_ComputeMoveStatistics Mc_ComputeMoveStatistics__ -#define Mc_ComputeSerialTotalV Mc_ComputeSerialTotalV__ -#define ComputeLoad ComputeLoad__ -#define ConjGrad2 ConjGrad2__ -#define mvMult2 mvMult2__ -#define ComputeTransferVector ComputeTransferVector__ -#define ComputeSerialEdgeCut ComputeSerialEdgeCut__ -#define ComputeSerialTotalV ComputeSerialTotalV__ - -/* akwayfm.c */ -#define Mc_KWayAdaptiveRefine Mc_KWayAdaptiveRefine__ - -/* selectq.c */ -#define Mc_DynamicSelectQueue Mc_DynamicSelectQueue__ -#define Mc_HashVwgts Mc_HashVwgts__ -#define Mc_HashVRank Mc_HashVRank__ - -/* csrmatch.c */ -#define CSR_Match_SHEM CSR_Match_SHEM__ - -/* serial.c */ -#define Mc_SerialKWayAdaptRefine Mc_SerialKWayAdaptRefine__ -#define Mc_ComputeSerialPartitionParams Mc_ComputeSerialPartitionParams__ -#define AreAllHVwgtsBelow AreAllHVwgtsBelow__ -#define ComputeHKWayLoadImbalance ComputeHKWayLoadImbalance__ -#define SerialRemap SerialRemap__ -#define SSMIncKeyCmp SSMIncKeyCmp__ -#define Mc_Serial_FM_2WayRefine Mc_Serial_FM_2WayRefine__ -#define Serial_SelectQueue Serial_SelectQueue__ -#define Serial_BetterBalance Serial_BetterBalance__ -#define Serial_Compute2WayHLoadImbalance Serial_Compute2WayHLoadImbalance__ -#define Mc_Serial_Balance2Way Mc_Serial_Balance2Way__ -#define Mc_Serial_Init2WayBalance Mc_Serial_Init2WayBalance__ -#define Serial_SelectQueueOneWay Serial_SelectQueueOneWay__ -#define Mc_Serial_Compute2WayPartitionParams Mc_Serial_Compute2WayPartitionParams__ -#define Serial_AreAnyVwgtsBelow Serial_AreAnyVwgtsBelow__ - -/* weird.c */ -#define PartitionSmallGraph PartitionSmallGraph__ -#define CheckInputs CheckInputs__ - - -/****************************/ -/* Mesh to Dual subroutines */ -/****************************/ -/* mesh.c */ -/* msetup.c */ -#define SetUpMesh SetUpMesh__ -#define CreateMesh CreateMesh__ -#define InitMesh InitMesh__ - - -/************************/ -/* Ordering subroutines */ -/************************/ -/* ometis.c */ -#define MultilevelOrder MultilevelOrder__ -#define Order_Partition_Multiple Order_Partition_Multiple__ -#define Order_Partition Order_Partition__ -#define LabelSeparators LabelSeparators__ -#define CompactGraph CompactGraph__ -#define LocalNDOrder LocalNDOrder__ - -/* pspases.c */ -#define AssembleEntireGraph AssembleEntireGraph__ - - -/* node_refine.c */ -#define AllocateNodePartitionParams AllocateNodePartitionParams__ -#define ComputeNodePartitionParams ComputeNodePartitionParams__ -#define UpdateNodePartitionParams UpdateNodePartitionParams__ -#define KWayNodeRefine_Greedy KWayNodeRefine_Greedy__ -#define KWayNodeRefine_Greedy2 KWayNodeRefine_Greedy2__ -#define KWayNodeRefine2Phase KWayNodeRefine2Phase__ -#define KWayNodeRefine2Phase2 KWayNodeRefine2Phase2__ -#define KWayNodeRefineInterior KWayNodeRefineInterior__ -#define PrintNodeBalanceInfo PrintNodeBalanceInfo__ - - -/* initmsection.c */ -#define InitMultisection InitMultisection__ -#define AssembleMultisectedGraph AssembleMultisectedGraph__ - -/* xyzpart.c */ -#define Coordinate_Partition Coordinate_Partition__ -#define PartSort PartSort__ - -/***********************/ -/* Utility subroutines */ -/***********************/ -/* fpqueue.c */ -#define FPQueueInit FPQueueInit__ -#define FPQueueReset FPQueueReset__ -#define FPQueueFree FPQueueFree__ -#define FPQueueGetSize FPQueueGetSize__ -#define FPQueueInsert FPQueueInsert__ -#define FPQueueDelete FPQueueDelete__ -#define FPQueueUpdate FPQueueUpdate__ -#define FPQueueUpdateUp FPQueueUpdateUp__ -#define FPQueueGetMax FPQueueGetMax__ -#define FPQueueSeeMaxVtx FPQueueSeeMaxVtx__ -#define FPQueueSeeMaxGain FPQueueSeeMaxGain__ -#define FPQueueGetKey FPQueueGetKey__ -#define FPQueueGetQSize FPQueueGetQSize__ -#define CheckHeapFloat CheckHeapFloat__ - -/* stat.c */ -#define Mc_ComputeSerialBalance Mc_ComputeSerialBalance__ -#define Mc_ComputeParallelBalance Mc_ComputeParallelBalance__ -#define Mc_PrintThrottleMatrix Mc_PrintThrottleMatrix__ -#define Mc_ComputeRefineStats Mc_ComputeRefineStats__ - -/* debug.c */ -#define PrintVector PrintVector__ -#define PrintVector2 PrintVector2__ -#define PrintPairs PrintPairs__ -#define PrintGraph PrintGraph__ -#define PrintGraph2 PrintGraph2__ -#define PrintSetUpInfo PrintSetUpInfo__ -#define PrintTransferedGraphs PrintTransferedGraphs__ -#define WriteMetisGraph WriteMetisGraph__ - -/* comm.c */ -#define CommInterfaceData CommInterfaceData__ -#define CommChangedInterfaceData CommChangedInterfaceData__ -#define GlobalSEMax GlobalSEMax__ -#define GlobalSEMaxDouble GlobalSEMaxDouble__ -#define GlobalSEMin GlobalSEMin__ -#define GlobalSESum GlobalSESum__ -#define GlobalSEMaxFloat GlobalSEMaxFloat__ -#define GlobalSEMinFloat GlobalSEMinFloat__ -#define GlobalSESumFloat GlobalSESumFloat__ - -/* util.c */ -#define errexit errexit__ -#define myprintf myprintf__ -#define rprintf rprintf__ -#define imalloc imalloc__ -#define idxmalloc idxmalloc__ -#define fmalloc fmalloc__ -#define ismalloc ismalloc__ -#define idxsmalloc idxsmalloc__ -#define GKmalloc GKmalloc__ -#define GKfree GKfree__ -#define iset iset__ -#define idxset idxset__ -#define idxamax idxamax__ -#define idxamin idxamin__ -#define idxasum idxasum__ -#define snorm2 snorm2__ -#define sdot sdot__ -#define saxpy saxpy__ -#define ikeyvalsort_org ikeyvalsort_org__ -#define IncKeyValueCmp IncKeyValueCmp__ -#define dkeyvalsort dkeyvalsort__ -#define DecKeyValueCmp DecKeyValueCmp__ -#define BSearch BSearch__ -#define RandomPermute RandomPermute__ -#define FastRandomPermute FastRandomPermute__ -#define ispow2 ispow2__ -#define log2Int log2Int__ -#define BucketSortKeysDec BucketSortKeysDec__ -#define sset sset__ -#define iamax iamax__ -#define idxamax_strd idxamax_strd__ -#define idxamin_strd idxamin_strd__ -#define samax_strd samax_strd__ -#define sfamax sfamax__ -#define samin_strd samin_strd__ -#define idxavg idxavg__ -#define savg savg__ -#define samax samax__ -#define sfavg sfavg__ -#define samax2 samax2__ -#define samin samin__ -#define idxsum idxsum__ -#define idxsum_strd idxsum_strd__ -#define idxadd idxadd__ -#define ssum ssum__ -#define ssum_strd ssum_strd__ -#define sscale sscale__ -#define saneg saneg__ -#define BetterVBalance BetterVBalance__ -#define IsHBalanceBetterTT IsHBalanceBetterTT__ -#define IsHBalanceBetterFT IsHBalanceBetterFT__ -#define myvalkeycompare myvalkeycompare__ -#define imyvalkeycompare imyvalkeycompare__ -#define fsmalloc fsmalloc__ -#define saxpy2 saxpy2__ -#define GetThreeMax GetThreeMax__ - -/* qsort_special.c */ -#define iidxsort iidxsort__ -#define iintsort iintsort__ -#define ikeysort ikeysort__ -#define ikeyvalsort ikeyvalsort__ - -/* grsetup.c */ -#define Mc_SetUpGraph Mc_SetUpGraph__ -#define SetUpCtrl SetUpCtrl__ -#define SetUpComm SetUpComm__ -#define ChangeNumbering ChangeNumbering__ -#define ChangeNumberingMesh ChangeNumberingMesh__ -#define GraphRandomPermute GraphRandomPermute__ -#define ComputeMoveStatistics ComputeMoveStatistics__ - -/* timer.c */ -#define InitTimers InitTimers__ -#define PrintTimingInfo PrintTimingInfo__ -#define PrintTimer PrintTimer__ - -/* setup.c */ -#define SetUp SetUp__ -#define Home_PE Home_PE__ - - +#ifndef _LIBPARMETIS_RENAME_H_ +#define _LIBPARMETIS_RENAME_H_ + +#define KWayAdaptiveRefine libparmetis__KWayAdaptiveRefine +#define Adaptive_Partition libparmetis__Adaptive_Partition +#define BalanceMyLink libparmetis__BalanceMyLink +#define CommChangedInterfaceData libparmetis__CommChangedInterfaceData +#define CommInterfaceData libparmetis__CommInterfaceData +#define CommSetup libparmetis__CommSetup +#define CommUpdateNnbrs libparmetis__CommUpdateNnbrs +#define GlobalSEMax libparmetis__GlobalSEMax +#define GlobalSEMaxComm libparmetis__GlobalSEMaxComm +#define GlobalSEMaxFloat libparmetis__GlobalSEMaxFloat +#define GlobalSEMin libparmetis__GlobalSEMin +#define GlobalSEMinComm libparmetis__GlobalSEMinComm +#define GlobalSEMinFloat libparmetis__GlobalSEMinFloat +#define GlobalSESum libparmetis__GlobalSESum +#define GlobalSESumComm libparmetis__GlobalSESumComm +#define GlobalSESumFloat libparmetis__GlobalSESumFloat +#define CSR_Match_SHEM libparmetis__CSR_Match_SHEM +#define FreeCtrl libparmetis__FreeCtrl +#define SetupCtrl libparmetis__SetupCtrl +#define SetupCtrl_invtvwgts libparmetis__SetupCtrl_invtvwgts +#define PrintGraph libparmetis__PrintGraph +#define PrintGraph2 libparmetis__PrintGraph2 +#define PrintPairs libparmetis__PrintPairs +#define PrintSetUpInfo libparmetis__PrintSetUpInfo +#define PrintTransferedGraphs libparmetis__PrintTransferedGraphs +#define PrintVector libparmetis__PrintVector +#define PrintVector2 libparmetis__PrintVector2 +#define WriteMetisGraph libparmetis__WriteMetisGraph +#define ComputeLoad libparmetis__ComputeLoad +#define ComputeTransferVector libparmetis__ComputeTransferVector +#define ConjGrad2 libparmetis__ConjGrad2 +#define Mc_ComputeMoveStatistics libparmetis__Mc_ComputeMoveStatistics +#define Mc_ComputeSerialTotalV libparmetis__Mc_ComputeSerialTotalV +#define SetUpConnectGraph libparmetis__SetUpConnectGraph +#define mvMult2 libparmetis__mvMult2 +#define gkMPI_Allgather libparmetis__gkMPI_Allgather +#define gkMPI_Allgatherv libparmetis__gkMPI_Allgatherv +#define gkMPI_Allreduce libparmetis__gkMPI_Allreduce +#define gkMPI_Alltoall libparmetis__gkMPI_Alltoall +#define gkMPI_Alltoallv libparmetis__gkMPI_Alltoallv +#define gkMPI_Barrier libparmetis__gkMPI_Barrier +#define gkMPI_Bcast libparmetis__gkMPI_Bcast +#define gkMPI_Comm_free libparmetis__gkMPI_Comm_free +#define gkMPI_Comm_rank libparmetis__gkMPI_Comm_rank +#define gkMPI_Comm_size libparmetis__gkMPI_Comm_size +#define gkMPI_Comm_split libparmetis__gkMPI_Comm_split +#define gkMPI_Finalize libparmetis__gkMPI_Finalize +#define gkMPI_Gatherv libparmetis__gkMPI_Gatherv +#define gkMPI_Get_count libparmetis__gkMPI_Get_count +#define gkMPI_Irecv libparmetis__gkMPI_Irecv +#define gkMPI_Isend libparmetis__gkMPI_Isend +#define gkMPI_Recv libparmetis__gkMPI_Recv +#define gkMPI_Reduce libparmetis__gkMPI_Reduce +#define gkMPI_Scan libparmetis__gkMPI_Scan +#define gkMPI_Scatterv libparmetis__gkMPI_Scatterv +#define gkMPI_Send libparmetis__gkMPI_Send +#define gkMPI_Wait libparmetis__gkMPI_Wait +#define gkMPI_Waitall libparmetis__gkMPI_Waitall +#define CreateGraph libparmetis__CreateGraph +#define FreeGraph libparmetis__FreeGraph +#define FreeInitialGraphAndRemap libparmetis__FreeInitialGraphAndRemap +#define FreeNonGraphFields libparmetis__FreeNonGraphFields +#define FreeNonGraphNonSetupFields libparmetis__FreeNonGraphNonSetupFields +#define InitGraph libparmetis__InitGraph +#define SetupGraph libparmetis__SetupGraph +#define SetupGraph_nvwgts libparmetis__SetupGraph_nvwgts +#define AssembleAdaptiveGraph libparmetis__AssembleAdaptiveGraph +#define Balance_Partition libparmetis__Balance_Partition +#define AssembleMultisectedGraph libparmetis__AssembleMultisectedGraph +#define InitMultisection libparmetis__InitMultisection +#define InitPartition libparmetis__InitPartition +#define KeepPart libparmetis__KeepPart +#define Global_Partition libparmetis__Global_Partition +#define ComputePartitionParams libparmetis__ComputePartitionParams +#define KWayBalance libparmetis__KWayBalance +#define KWayFM libparmetis__KWayFM +#define ProjectPartition libparmetis__ProjectPartition +#define CreateCoarseGraph_Global libparmetis__CreateCoarseGraph_Global +#define CreateCoarseGraph_Local libparmetis__CreateCoarseGraph_Local +#define Match_Global libparmetis__Match_Global +#define Match_Local libparmetis__Match_Local +#define ExtractGraph libparmetis__ExtractGraph +#define Mc_Diffusion libparmetis__Mc_Diffusion +#define CheckMGraph libparmetis__CheckMGraph +#define FindVtxPerm libparmetis__FindVtxPerm +#define MoveGraph libparmetis__MoveGraph +#define ProjectInfoBack libparmetis__ProjectInfoBack +#define CreateMesh libparmetis__CreateMesh +#define InitMesh libparmetis__InitMesh +#define SetUpMesh libparmetis__SetUpMesh +#define AllocateNodePartitionParams libparmetis__AllocateNodePartitionParams +#define ComputeNodePartitionParams libparmetis__ComputeNodePartitionParams +#define KWayNodeRefine2Phase libparmetis__KWayNodeRefine2Phase +#define KWayNodeRefineInterior libparmetis__KWayNodeRefineInterior +#define KWayNodeRefine_Greedy libparmetis__KWayNodeRefine_Greedy +#define PrintNodeBalanceInfo libparmetis__PrintNodeBalanceInfo +#define UpdateNodePartitionParams libparmetis__UpdateNodePartitionParams +#define CompactGraph libparmetis__CompactGraph +#define LabelSeparators libparmetis__LabelSeparators +#define LocalNDOrder libparmetis__LocalNDOrder +#define MultilevelOrder libparmetis__MultilevelOrder +#define Order_Partition libparmetis__Order_Partition +#define Order_Partition_Multiple libparmetis__Order_Partition_Multiple +#define AssembleEntireGraph libparmetis__AssembleEntireGraph +#define RedoMyLink libparmetis__RedoMyLink +#define ParallelReMapGraph libparmetis__ParallelReMapGraph +#define ParallelTotalVReMap libparmetis__ParallelTotalVReMap +#define SimilarTpwgts libparmetis__SimilarTpwgts +#define ChangeNumbering libparmetis__ChangeNumbering +#define ChangeNumberingMesh libparmetis__ChangeNumberingMesh +#define Mc_DynamicSelectQueue libparmetis__Mc_DynamicSelectQueue +#define Mc_HashVRank libparmetis__Mc_HashVRank +#define Mc_HashVwgts libparmetis__Mc_HashVwgts +#define AreAllHVwgtsBelow libparmetis__AreAllHVwgtsBelow +#define ComputeHKWayLoadImbalance libparmetis__ComputeHKWayLoadImbalance +#define ComputeSerialEdgeCut libparmetis__ComputeSerialEdgeCut +#define ComputeSerialTotalV libparmetis__ComputeSerialTotalV +#define Mc_ComputeSerialPartitionParams libparmetis__Mc_ComputeSerialPartitionParams +#define Mc_SerialKWayAdaptRefine libparmetis__Mc_SerialKWayAdaptRefine +#define Mc_Serial_Balance2Way libparmetis__Mc_Serial_Balance2Way +#define Mc_Serial_Compute2WayPartitionParams libparmetis__Mc_Serial_Compute2WayPartitionParams +#define Mc_Serial_FM_2WayRefine libparmetis__Mc_Serial_FM_2WayRefine +#define Mc_Serial_Init2WayBalance libparmetis__Mc_Serial_Init2WayBalance +#define SSMIncKeyCmp libparmetis__SSMIncKeyCmp +#define SerialRemap libparmetis__SerialRemap +#define Serial_AreAnyVwgtsBelow libparmetis__Serial_AreAnyVwgtsBelow +#define Serial_BetterBalance libparmetis__Serial_BetterBalance +#define Serial_Compute2WayHLoadImbalance libparmetis__Serial_Compute2WayHLoadImbalance +#define Serial_SelectQueue libparmetis__Serial_SelectQueue +#define Serial_SelectQueueOneWay libparmetis__Serial_SelectQueueOneWay +#define ComputeMoveStatistics libparmetis__ComputeMoveStatistics +#define ComputeParallelBalance libparmetis__ComputeParallelBalance +#define ComputeSerialBalance libparmetis__ComputeSerialBalance +#define Mc_PrintThrottleMatrix libparmetis__Mc_PrintThrottleMatrix +#define PrintPostPartInfo libparmetis__PrintPostPartInfo +#define InitTimers libparmetis__InitTimers +#define PrintTimer libparmetis__PrintTimer +#define PrintTimingInfo libparmetis__PrintTimingInfo +#define BSearch libparmetis__BSearch +#define BetterVBalance libparmetis__BetterVBalance +#define FastRandomPermute libparmetis__FastRandomPermute +#define GetThreeMax libparmetis__GetThreeMax +#define IsHBalanceBetterFT libparmetis__IsHBalanceBetterFT +#define IsHBalanceBetterTT libparmetis__IsHBalanceBetterTT +#define RandomPermute libparmetis__RandomPermute +#define ispow2 libparmetis__ispow2 +#define log2Int libparmetis__log2Int +#define myprintf libparmetis__myprintf +#define rargmax2 libparmetis__rargmax2 +#define rargmax_strd libparmetis__rargmax_strd +#define rargmin_strd libparmetis__rargmin_strd +#define ravg libparmetis__ravg +#define rfavg libparmetis__rfavg +#define rprintf libparmetis__rprintf +#define WavefrontDiffusion libparmetis__WavefrontDiffusion +#define CheckInputsAdaptiveRepart libparmetis__CheckInputsAdaptiveRepart +#define CheckInputsNodeND libparmetis__CheckInputsNodeND +#define CheckInputsPartGeom libparmetis__CheckInputsPartGeom +#define CheckInputsPartGeomKway libparmetis__CheckInputsPartGeomKway +#define CheckInputsPartKway libparmetis__CheckInputsPartKway +#define CheckInputsPartMeshKway libparmetis__CheckInputsPartMeshKway +#define PartitionSmallGraph libparmetis__PartitionSmallGraph +#define AllocateRefinementWorkSpace libparmetis__AllocateRefinementWorkSpace +#define AllocateWSpace libparmetis__AllocateWSpace +#define FreeWSpace libparmetis__FreeWSpace +#define cnbrpoolGetNext libparmetis__cnbrpoolGetNext +#define cnbrpoolReset libparmetis__cnbrpoolReset +#define ikvwspacemalloc libparmetis__ikvwspacemalloc +#define iwspacemalloc libparmetis__iwspacemalloc +#define rkvwspacemalloc libparmetis__rkvwspacemalloc +#define rwspacemalloc libparmetis__rwspacemalloc +#define wspacemalloc libparmetis__wspacemalloc +#define Coordinate_Partition libparmetis__Coordinate_Partition +#define IRBinCoordinates libparmetis__IRBinCoordinates +#define PseudoSampleSort libparmetis__PseudoSampleSort +#define RBBinCoordinates libparmetis__RBBinCoordinates +#define SampleSort libparmetis__SampleSort + +#endif diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/renumber.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/renumber.c new file mode 100644 index 00000000..0cdb4f49 --- /dev/null +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/renumber.c @@ -0,0 +1,94 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * mgrsetup.c + * + * This file contain various graph setting up routines + * + * Started 10/19/96 + * George + * + * $Id: renumber.c 10531 2011-07-09 21:58:13Z karypis $ + * + */ + +#include + + + + +/************************************************************************* +* This function changes the numbering from 1 to 0 or 0 to 1 +**************************************************************************/ +void ChangeNumbering(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *part, idx_t npes, idx_t mype, idx_t from) +{ + idx_t i, nvtxs; + + nvtxs = vtxdist[mype+1]-vtxdist[mype]; + + if (from == 1) { /* Change it from 1 to 0 */ + for (i=0; inpes; + mype = ctrl->mype; - /********************************/ - /* Try and take care bad inputs */ - /********************************/ - if (options != NULL && options[0] == 1) - dbglvl = options[PMV3_OPTION_DBGLVL]; - CheckInputs(REFINE_PARTITION, npes, dbglvl, wgtflag, &iwgtflag, numflag, &inumflag, - ncon, &incon, nparts, &inparts, tpwgts, &itpwgts, ubvec, iubvec, - NULL, NULL, options, ioptions, part, comm); - - /* ADD: take care of disconnected graph */ - /* ADD: take care of highly unbalanced vtxdist */ - /*********************************/ - /* Take care the nparts = 1 case */ - /*********************************/ - if (inparts <= 1) { - idxset(vtxdist[mype+1]-vtxdist[mype], 0, part); + + /* Take care the nparts == 1 case */ + if (*nparts == 1) { + iset(vtxdist[mype+1]-vtxdist[mype], (*numflag == 0 ? 0 : 1), part); *edgecut = 0; - return; + goto DONE; } - /**************************/ - /* Set up data structures */ - /**************************/ - if (inumflag == 1) + + /* setup the graph */ + if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1); - /*****************************/ - /* Set up control structures */ - /*****************************/ - if (ioptions[0] == 1) { - dbglvl = ioptions[PMV3_OPTION_DBGLVL]; - seed = ioptions[PMV3_OPTION_SEED]; - ps_relation = (npes == inparts) ? ioptions[PMV3_OPTION_PSR] : PARMETIS_PSR_UNCOUPLED; - } - else { - dbglvl = GLOBAL_DBGLVL; - seed = GLOBAL_SEED; - ps_relation = (npes == inparts) ? PARMETIS_PSR_COUPLED : PARMETIS_PSR_UNCOUPLED; - } + graph = SetupGraph(ctrl, *ncon, vtxdist, xadj, vwgt, NULL, adjncy, adjwgt, *wgtflag); - SetUpCtrl(&ctrl, inparts, dbglvl, *comm); - ctrl.CoarsenTo = amin(vtxdist[npes]+1, 50*incon*amax(npes, inparts)); - ctrl.ipc_factor = 1000.0; - ctrl.redist_factor = 1.0; - ctrl.redist_base = 1.0; - ctrl.seed = (seed == 0) ? mype : seed*mype; - ctrl.sync = GlobalSEMax(&ctrl, seed); - ctrl.partType = REFINE_PARTITION; - ctrl.ps_relation = ps_relation; - ctrl.tpwgts = itpwgts; - - graph = Mc_SetUpGraph(&ctrl, incon, vtxdist, xadj, vwgt, adjncy, adjwgt, &iwgtflag); - graph->vsize = idxsmalloc(graph->nvtxs, 1, "vsize"); - - graph->home = idxmalloc(graph->nvtxs, "home"); - if (ctrl.ps_relation == PARMETIS_PSR_COUPLED) - idxset(graph->nvtxs, mype, graph->home); + if (ctrl->ps_relation == PARMETIS_PSR_COUPLED) + iset(graph->nvtxs, mype, graph->home); else - idxcopy(graph->nvtxs, part, graph->home); + icopy(graph->nvtxs, part, graph->home); - tewgt = idxsum(graph->nedges, graph->adjwgt); - tvsize = idxsum(graph->nvtxs, graph->vsize); - gtewgt = (float) GlobalSESum(&ctrl, tewgt) + 1.0/graph->gnvtxs; - gtvsize = (float) GlobalSESum(&ctrl, tvsize) + 1.0/graph->gnvtxs; - ctrl.edge_size_ratio = gtewgt/gtvsize; - scopy(incon, iubvec, ctrl.ubvec); - AllocateWSpace(&ctrl, graph, &wspace); + /* Allocate workspace */ + AllocateWSpace(ctrl, 10*graph->nvtxs); + - /***********************/ /* Partition and Remap */ - /***********************/ - IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); + STARTTIMER(ctrl, ctrl->TotalTmr); - Adaptive_Partition(&ctrl, graph, &wspace); - ParallelReMapGraph(&ctrl, graph, &wspace); + ctrl->CoarsenTo = gk_min(vtxdist[npes]+1, 50*(*ncon)*gk_max(npes, *nparts)); - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); + Adaptive_Partition(ctrl, graph); + ParallelReMapGraph(ctrl, graph); - idxcopy(graph->nvtxs, graph->where, part); - if (edgecut != NULL) - *edgecut = graph->mincut; + icopy(graph->nvtxs, graph->where, part); + *edgecut = graph->mincut; - /***********************/ - /* Take care of output */ - /***********************/ - IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); - IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); - - if (ctrl.dbglvl&DBG_INFO) { - Mc_ComputeMoveStatistics(&ctrl, graph, &nmoved, &maxin, &maxout); - rprintf(&ctrl, "Final %3d-way Cut: %6d \tBalance: ", inparts, graph->mincut); - avg = 0.0; - for (h=0; hgnpwgts[i*incon+h]/itpwgts[i*incon+h]); - avg += maximb; - rprintf(&ctrl, "%.3f ", maximb); - } - rprintf(&ctrl, "\nNMoved: %d %d %d %d\n", nmoved, maxin, maxout, maxin+maxout); - } + STOPTIMER(ctrl, ctrl->TotalTmr); - /*************************************/ - /* Free memory, renumber, and return */ - /*************************************/ - GKfree((void **)&graph->lnpwgts, &graph->gnpwgts, &graph->nvwgt, &graph->home, - &graph->vsize, &itpwgts, LTERM); + /* Take care of output */ + IFSET(ctrl->dbglvl, DBG_TIME, PrintTimingInfo(ctrl)); + IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->gcomm)); + IFSET(ctrl->dbglvl, DBG_INFO, PrintPostPartInfo(ctrl, graph, 1)); - FreeInitialGraphAndRemap(graph, iwgtflag, 1); - FreeWSpace(&wspace); - FreeCtrl(&ctrl); + FreeInitialGraphAndRemap(graph); - if (inumflag == 1) + if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0); - return; +DONE: + FreeCtrl(&ctrl); + if (gk_GetCurMemoryUsed() - curmem > 0) { + printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n", + (ssize_t)(gk_GetCurMemoryUsed() - curmem)); + } + gk_malloc_cleanup(0); + + return (int)status; } diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/selectq.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/selectq.c index 2db824c5..e67fac3f 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/selectq.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/selectq.c @@ -8,85 +8,96 @@ * Started 7/28/97 * George * - * $Id: selectq.c,v 1.2 2003/07/21 17:18:53 karypis Exp $ + * $Id: selectq.c 10542 2011-07-11 16:56:22Z karypis $ */ #include -/************************************************************************* -* This stuff is hardcoded for up to four constraints -**************************************************************************/ -void Mc_DynamicSelectQueue(int nqueues, int ncon, int subdomain1, int subdomain2, - idxtype *currentq, float *flows, int *from, int *qnum, int minval, float avgvwgt, - float maxdiff) +/*************************************************************************/ +/*! This stuff is hardcoded for up to four constraints +*/ +/*************************************************************************/ +void Mc_DynamicSelectQueue(ctrl_t *ctrl, idx_t nqueues, idx_t ncon, idx_t subdomain1, + idx_t subdomain2, idx_t *currentq, real_t *flows, idx_t *from, idx_t *qnum, + idx_t minval, real_t avgvwgt, real_t maxdiff) { - int i, j; - int hash, index = -1, current; - int cand[MAXNCON], rank[MAXNCON], dont_cares[MAXNCON]; - int nperms, perm[24][5]; - float sign = 0.0; - KVType array[MAXNCON]; -int mype; -MPI_Comm_rank(MPI_COMM_WORLD, &mype); + idx_t i, j; + idx_t hash, index = -1, current; + idx_t *cand, *rank, *dont_cares; + idx_t nperms, perm[24][5]; + real_t sign = 0.0; + rkv_t *array; + idx_t mype; + gkMPI_Comm_rank(MPI_COMM_WORLD, &mype); + + WCOREPUSH; *qnum = -1; + /* allocate memory */ + cand = iwspacemalloc(ctrl, ncon); + rank = iwspacemalloc(ctrl, ncon); + dont_cares = iwspacemalloc(ctrl, ncon); + array = rkvwspacemalloc(ctrl, ncon); + if (*from == -1) { for (i=0; iavgvwgt*MOC_GD_GRANULARITY_FACTOR) { + if (flows[array[ncon-1].val] > avgvwgt*MOC_GD_GRANULARITY_FACTOR) { *from = subdomain1; - sign = 1.0; + sign = 1.0; index = 0; } - if (flows[array[ncon-1].key]<-1.0*avgvwgt*MOC_GD_GRANULARITY_FACTOR) { + if (flows[array[ncon-1].val] < -1.0*avgvwgt*MOC_GD_GRANULARITY_FACTOR) { *from = subdomain2; - sign = -1.0; + sign = -1.0; index = nqueues; } - if (*from == -1) { - return; - } + if (*from == -1) + goto DONE; } else { - ASSERTS(*from == subdomain1 || *from == subdomain2); + ASSERT(*from == subdomain1 || *from == subdomain2); if (*from == subdomain1) { - sign = 1.0; + sign = 1.0; index = 0; } else { - sign = -1.0; + sign = -1.0; index = nqueues; } } for (i=0; i 0) { *qnum = hash; - return; + goto DONE; } } - return; +DONE: + WCOREPOP; } -/************************************************************************* -* This function sorts the nvwgts of a vertex and returns a hashed value -**************************************************************************/ -int Mc_HashVwgts(int ncon, float *nvwgt) +/*************************************************************************/ +/*! This function sorts the nvwgts of a vertex and returns a hashed value +*/ +/*************************************************************************/ +idx_t Mc_HashVwgts(ctrl_t *ctrl, idx_t ncon, real_t *nvwgt) { - int i; - int multiplier, retval; - int rank[MAXNCON]; - KVType array[MAXNCON]; + idx_t i; + idx_t multiplier, retval; + idx_t *rank; + rkv_t *array; + WCOREPUSH; + + rank = iwspacemalloc(ctrl, ncon); + array = rkvwspacemalloc(ctrl, ncon); for (i=0; invtxs; + ncon = graph->ncon; + xadj = graph->xadj; + nvwgt = graph->nvwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + where = graph->where; - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; + npwgts = rset(ncon*nparts, 0.0, graph->gnpwgts); + + PASSERT(ctrl, graph->ckrinfo != NULL); + PASSERT(ctrl, ctrl->cnbrpool != NULL); + + memset(graph->ckrinfo, 0, sizeof(ckrinfo_t)*nvtxs); + cnbrpoolReset(ctrl); + + /*------------------------------------------------------------ + / Compute now the id/ed degrees + /------------------------------------------------------------*/ + nedges = mincut = 0; + for (i=0; ickrinfo+i; + + for (j=xadj[i]; jid += adjwgt[j]; + } + else { + myrinfo->ed += adjwgt[j]; + } + } + + mincut += myrinfo->ed; + + /* Time to compute the particular external degrees */ + if (myrinfo->ed > 0) { + myrinfo->inbr = cnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]); + mynbrs = ctrl->cnbrpool + myrinfo->inbr; + + for (j=xadj[i]; jnnbrs; k++) { + if (mynbrs[k].pid == other) { + mynbrs[k].ed += adjwgt[j]; + break; + } + } + if (k == myrinfo->nnbrs) { + mynbrs[k].pid = other; + mynbrs[k].ed = adjwgt[j]; + myrinfo->nnbrs++; + } + } + } + } + else { + myrinfo->inbr = -1; + } + } + + graph->mincut = mincut/2; + + return; +} + + +/************************************************************************* +* This function performs k-way refinement +**************************************************************************/ +void Mc_SerialKWayAdaptRefine(ctrl_t *ctrl, graph_t *graph, idx_t nparts, + idx_t *home, real_t *orgubvec, idx_t npasses) +{ + idx_t i, ii, iii, j, k; + idx_t nvtxs, ncon, pass, nmoves; + idx_t from, me, myhome, to, oldcut, gain, tmp; + idx_t *xadj, *adjncy, *adjwgt; + idx_t *where; + real_t *npwgts, *nvwgt, *minwgt, *maxwgt, *ubvec; + idx_t gain_is_greater, gain_is_same, fit_in_to, fit_in_from, going_home; + idx_t zero_gain, better_balance_ft, better_balance_tt; + ikv_t *cand; + idx_t mype; + ckrinfo_t *myrinfo; + cnbr_t *mynbrs; + + WCOREPUSH; + + gkMPI_Comm_rank(MPI_COMM_WORLD, &mype); + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; adjncy = graph->adjncy; adjwgt = graph->adjwgt; - where = graph->where; - rinfo = graph->rinfo; + where = graph->where; npwgts = graph->gnpwgts; /* Setup the weight intervals of the various subdomains */ - cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand"); - minwgt = fmalloc(nparts*ncon, "minwgt"); - maxwgt = fmalloc(nparts*ncon, "maxwgt"); + cand = ikvwspacemalloc(ctrl, nvtxs); + minwgt = rwspacemalloc(ctrl, nparts*ncon); + maxwgt = rwspacemalloc(ctrl, nparts*ncon); + ubvec = rwspacemalloc(ctrl, ncon); ComputeHKWayLoadImbalance(ncon, nparts, npwgts, ubvec); for (i=0; imincut; for (i=0; ickrinfo[i].ed-graph->ckrinfo[i].id; cand[i].val = i; } - ikeysort(nvtxs, cand); + ikvsortd(nvtxs, cand); nmoves = 0; for (iii=0; iiickrinfo+i; if (myrinfo->ed >= myrinfo->id) { - from = where[i]; + from = where[i]; myhome = home[i]; - nvwgt = graph->nvwgt+i*ncon; + nvwgt = graph->nvwgt+i*ncon; if (myrinfo->id > 0 && - AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, -1.0, nvwgt, minwgt+from*ncon)) + AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, -1.0, nvwgt, minwgt+from*ncon)) continue; - mydegrees = myrinfo->degrees; - myndegrees = myrinfo->ndegrees; + mynbrs = ctrl->cnbrpool + myrinfo->inbr; - for (k=0; kid; + for (k=myrinfo->nnbrs-1; k>=0; k--) { + to = mynbrs[k].pid; + gain = mynbrs[k].ed - myrinfo->id; if (gain >= 0 && (AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon) || IsHBalanceBetterFT(ncon,npwgts+from*ncon,npwgts+to*ncon,nvwgt,ubvec))) { @@ -96,19 +183,20 @@ MPI_Comm_rank(MPI_COMM_WORLD, &mype); } /* break out if you did not find a candidate */ - if (k == myndegrees) + if (k < 0) continue; - for (j=k+1; j mydegrees[k].ewgt); - fit_in_to = AreAllHVwgtsBelow(ncon,1.0,npwgts+to*ncon,1.0,nvwgt,maxwgt+to*ncon); - better_balance_ft = IsHBalanceBetterFT(ncon,npwgts+from*ncon, - npwgts+to*ncon,nvwgt,ubvec); - better_balance_tt = IsHBalanceBetterTT(ncon,npwgts+mydegrees[k].edge*ncon, - npwgts+to*ncon,nvwgt,ubvec); + for (j=k-1; j>=0; j--) { + to = mynbrs[j].pid; + going_home = (myhome == to); + gain_is_same = (mynbrs[j].ed == mynbrs[k].ed); + gain_is_greater = (mynbrs[j].ed > mynbrs[k].ed); + fit_in_to = AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, + maxwgt+to*ncon); + better_balance_ft = IsHBalanceBetterFT(ncon, npwgts+from*ncon, + npwgts+to*ncon, nvwgt, ubvec); + better_balance_tt = IsHBalanceBetterTT(ncon, npwgts+mynbrs[k].pid*ncon, + npwgts+to*ncon,nvwgt,ubvec); if ( (gain_is_greater && @@ -129,14 +217,14 @@ MPI_Comm_rank(MPI_COMM_WORLD, &mype); } } - to = mydegrees[k].edge; + to = mynbrs[k].pid; going_home = (myhome == to); - zero_gain = (mydegrees[k].ewgt == myrinfo->id); + zero_gain = (mynbrs[k].ed == myrinfo->id); - fit_in_from = AreAllHVwgtsBelow(ncon,1.0,npwgts+from*ncon,0.0,npwgts+from*ncon, - maxwgt+from*ncon); - better_balance_ft = IsHBalanceBetterFT(ncon,npwgts+from*ncon, - npwgts+to*ncon,nvwgt,ubvec); + fit_in_from = AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, 0.0, + npwgts+from*ncon, maxwgt+from*ncon); + better_balance_ft = IsHBalanceBetterFT(ncon, npwgts+from*ncon, + npwgts+to*ncon, nvwgt, ubvec); if (zero_gain && !going_home && @@ -147,30 +235,31 @@ MPI_Comm_rank(MPI_COMM_WORLD, &mype); /*===================================================================== * If we got here, we can now move the vertex from 'from' to 'to' *======================================================================*/ - graph->mincut -= mydegrees[k].ewgt-myrinfo->id; + graph->mincut -= mynbrs[k].ed-myrinfo->id; /* Update where, weight, and ID/ED information of the vertex you moved */ - saxpy2(ncon, 1.0, nvwgt, 1, npwgts+to*ncon, 1); - saxpy2(ncon, -1.0, nvwgt, 1, npwgts+from*ncon, 1); + raxpy(ncon, 1.0, nvwgt, 1, npwgts+to*ncon, 1); + raxpy(ncon, -1.0, nvwgt, 1, npwgts+from*ncon, 1); where[i] = to; - myrinfo->ed += myrinfo->id-mydegrees[k].ewgt; - SWAP(myrinfo->id, mydegrees[k].ewgt, tmp); + myrinfo->ed += myrinfo->id-mynbrs[k].ed; + gk_SWAP(myrinfo->id, mynbrs[k].ed, tmp); - if (mydegrees[k].ewgt == 0) { - myrinfo->ndegrees--; - mydegrees[k].edge = mydegrees[myrinfo->ndegrees].edge; - mydegrees[k].ewgt = mydegrees[myrinfo->ndegrees].ewgt; - } + if (mynbrs[k].ed == 0) + mynbrs[k] = mynbrs[--myrinfo->nnbrs]; else - mydegrees[k].edge = from; + mynbrs[k].pid = from; /* Update the degrees of adjacent vertices */ for (j=xadj[i]; jdegrees; + myrinfo = graph->ckrinfo+ii; + if (myrinfo->inbr == -1) { + myrinfo->inbr = cnbrpoolGetNext(ctrl, xadj[ii+1]-xadj[ii]); + myrinfo->nnbrs = 0; + } + mynbrs = ctrl->cnbrpool + myrinfo->inbr; if (me == from) { INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); @@ -183,15 +272,12 @@ MPI_Comm_rank(MPI_COMM_WORLD, &mype); /* Remove contribution of the ed from 'from' */ if (me != from) { - for (k=0; kndegrees; k++) { - if (mydegrees[k].edge == from) { - if (mydegrees[k].ewgt == adjwgt[j]) { - myrinfo->ndegrees--; - mydegrees[k].edge = mydegrees[myrinfo->ndegrees].edge; - mydegrees[k].ewgt = mydegrees[myrinfo->ndegrees].ewgt; - } + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == from) { + if (mynbrs[k].ed == adjwgt[j]) + mynbrs[k] = mynbrs[--myrinfo->nnbrs]; else - mydegrees[k].ewgt -= adjwgt[j]; + mynbrs[k].ed -= adjwgt[j]; break; } } @@ -199,18 +285,18 @@ MPI_Comm_rank(MPI_COMM_WORLD, &mype); /* Add contribution of the ed to 'to' */ if (me != to) { - for (k=0; kndegrees; k++) { - if (mydegrees[k].edge == to) { - mydegrees[k].ewgt += adjwgt[j]; + for (k=0; knnbrs; k++) { + if (mynbrs[k].pid == to) { + mynbrs[k].ed += adjwgt[j]; break; } } - if (k == myrinfo->ndegrees) { - mydegrees[myrinfo->ndegrees].edge = to; - mydegrees[myrinfo->ndegrees++].ewgt = adjwgt[j]; + if (k == myrinfo->nnbrs) { + mynbrs[k].pid = to; + mynbrs[k].ed = adjwgt[j]; + myrinfo->nnbrs++; } } - } nmoves++; } @@ -220,98 +306,21 @@ MPI_Comm_rank(MPI_COMM_WORLD, &mype); break; } - GKfree((void **)&minwgt, (void **)&maxwgt, (void **)&cand, LTERM); + WCOREPOP; return; } -/************************************************************************* -* This function computes the initial id/ed -**************************************************************************/ -void Mc_ComputeSerialPartitionParams(GraphType *graph, int nparts, - EdgeType *degrees) -{ - int i, j, k; - int nvtxs, nedges, ncon, mincut, me, other; - idxtype *xadj, *adjncy, *adjwgt, *where; - RInfoType *rinfo, *myrinfo; - EdgeType *mydegrees; - float *nvwgt, *npwgts; -int mype; -MPI_Comm_rank(MPI_COMM_WORLD, &mype); - - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - nvwgt = graph->nvwgt; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - where = graph->where; - rinfo = graph->rinfo; - - npwgts = sset(ncon*nparts, 0.0, graph->gnpwgts); - - /*------------------------------------------------------------ - / Compute now the id/ed degrees - /------------------------------------------------------------*/ - nedges = mincut = 0; - for (i=0; iid = myrinfo->ed = myrinfo->ndegrees = 0; - myrinfo->degrees = degrees + nedges; - nedges += xadj[i+1]-xadj[i]; - - for (j=xadj[i]; jid += adjwgt[j]; - } - else { - myrinfo->ed += adjwgt[j]; - } - } - - mincut += myrinfo->ed; - - /* Time to compute the particular external degrees */ - if (myrinfo->ed > 0) { - mydegrees = myrinfo->degrees; - - for (j=xadj[i]; jndegrees; k++) { - if (mydegrees[k].edge == other) { - mydegrees[k].ewgt += adjwgt[j]; - break; - } - } - if (k == myrinfo->ndegrees) { - mydegrees[myrinfo->ndegrees].edge = other; - mydegrees[myrinfo->ndegrees++].ewgt = adjwgt[j]; - } - } - } - } - } - - graph->mincut = mincut/2; - - return; -} - /************************************************************************* * This function checks if the vertex weights of two vertices are below * a given set of values **************************************************************************/ -int AreAllHVwgtsBelow(int ncon, float alpha, float *vwgt1, float beta, float *vwgt2, float *limit) +idx_t AreAllHVwgtsBelow(idx_t ncon, real_t alpha, real_t *vwgt1, real_t beta, + real_t *vwgt2, real_t *limit) { - int i; + idx_t i; for (i=0; i limit[i]) @@ -325,10 +334,10 @@ int AreAllHVwgtsBelow(int ncon, float alpha, float *vwgt1, float beta, float *vw * This function computes the load imbalance over all the constrains * For now assume that we just want balanced partitionings **************************************************************************/ -void ComputeHKWayLoadImbalance(int ncon, int nparts, float *npwgts, float *lbvec) +void ComputeHKWayLoadImbalance(idx_t ncon, idx_t nparts, real_t *npwgts, real_t *lbvec) { - int i, j; - float max; + idx_t i, j; + real_t max; for (i=0; invtxs; vsize = graph->vsize; - max_mult = amin(MAX_NPARTS_MULTIPLIER, nparts); + max_mult = gk_min(MAX_NPARTS_MULTIPLIER, nparts); - sortvtx = (KeyKeyValueType *)GKmalloc(nvtxs*sizeof(KeyKeyValueType), "sortvtx"); - flowto = (KeyValueType *)GKmalloc((nparts*max_mult+nparts)*sizeof(KeyValueType), "flowto"); - bestflow = flowto+nparts; - map = htable = idxsmalloc(nparts*2, -1, "htable"); + sortvtx = (i2kv_t *)wspacemalloc(ctrl, nvtxs*sizeof(i2kv_t)); + flowto = ikvwspacemalloc(ctrl, nparts); + bestflow = ikvwspacemalloc(ctrl, nparts*max_mult); + map = htable = iset(2*nparts, -1, iwspacemalloc(ctrl, 2*nparts)); rowmap = map+nparts; for (i=0; ikey1 > second->key1) return 1; @@ -507,47 +518,54 @@ int SSMIncKeyCmp(const void *fptr, const void *sptr) /************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ -void Mc_Serial_FM_2WayRefine(GraphType *graph, float *tpwgts, int npasses) +void Mc_Serial_FM_2WayRefine(ctrl_t *ctrl, graph_t *graph, real_t *tpwgts, idx_t npasses) { - int i, ii, j, k; - int kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, limit, tmp, cnum; - idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; - idxtype *moved, *swaps, *qnum; - float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal; - FPQueueType parts[MAXNCON][2]; - int higain, mincut, initcut, newcut, mincutorder; - float rtpwgts[MAXNCON*2]; - KeyValueType *cand; -int mype; -MPI_Comm_rank(MPI_COMM_WORLD, &mype); - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - nvwgt = graph->nvwgt; + idx_t i, ii, j, k; + idx_t kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, limit, tmp, cnum; + idx_t *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; + idx_t *moved, *swaps, *qnum; + real_t *nvwgt, *npwgts, *mindiff, *tmpdiff, origbal, minbal, newbal; + rpq_t **parts[2]; + idx_t higain, mincut, initcut, newcut, mincutorder; + real_t *rtpwgts; + idx_t mype; + + WCOREPUSH; + + gkMPI_Comm_rank(MPI_COMM_WORLD, &mype); + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; - where = graph->where; - id = graph->sendind; - ed = graph->recvind; + where = graph->where; + id = graph->sendind; + ed = graph->recvind; npwgts = graph->gnpwgts; bndptr = graph->sendptr; bndind = graph->recvptr; - moved = idxmalloc(nvtxs, "moved"); - swaps = idxmalloc(nvtxs, "swaps"); - qnum = idxmalloc(nvtxs, "qnum"); - cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand"); + mindiff = rwspacemalloc(ctrl, ncon); + tmpdiff = rwspacemalloc(ctrl, ncon); + rtpwgts = rwspacemalloc(ctrl, 2*ncon); + parts[0] = (rpq_t **)wspacemalloc(ctrl, sizeof(rpq_t *)*ncon); + parts[1] = (rpq_t **)wspacemalloc(ctrl, sizeof(rpq_t *)*ncon); - limit = amin(amax(0.01*nvtxs, 25), 150); + moved = iwspacemalloc(ctrl, nvtxs); + swaps = iwspacemalloc(ctrl, nvtxs); + qnum = iwspacemalloc(ctrl, nvtxs); + + limit = gk_min(gk_max(0.01*nvtxs, 25), 150); /* Initialize the queues */ for (i=0; ignvtxs; - - for (i=0; i limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); - saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); - saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); + raxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); + raxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); break; } @@ -619,7 +631,7 @@ MPI_Comm_rank(MPI_COMM_WORLD, &mype); /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ - SWAP(id[higain], ed[higain], tmp); + gk_SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); @@ -634,18 +646,18 @@ MPI_Comm_rank(MPI_COMM_WORLD, &mype); if (ed[k] == 0) { /* Not a boundary vertex any more */ BNDDelete(nbnd, bndind, bndptr, k); if (moved[k] == -1) /* Remove it if in the queues */ - FPQueueDelete(&parts[qnum[k]][where[k]], k); + rpqDelete(parts[where[k]][qnum[k]], k); } else { /* If it has not been moved, update its position in the queue */ if (moved[k] == -1) - FPQueueUpdate(&parts[qnum[k]][where[k]], k, (float)(ed[k]-id[k])); + rpqUpdate(parts[where[k]][qnum[k]], k, (real_t)(ed[k]-id[k])); } } else { if (ed[k] > 0) { /* It will now become a boundary vertex */ BNDInsert(nbnd, bndind, bndptr, k); if (moved[k] == -1) - FPQueueInsert(&parts[qnum[k]][where[k]], k, (float)(ed[k]-id[k])); + rpqInsert(parts[where[k]][qnum[k]], k, (real_t)(ed[k]-id[k])); } } } @@ -660,14 +672,14 @@ MPI_Comm_rank(MPI_COMM_WORLD, &mype); higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; - SWAP(id[higain], ed[higain], tmp); + gk_SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); - saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); - saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); + raxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); + raxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); for (j=xadj[higain]; j 0) { + if (rpqLength(queues[*from][i]) > 0) { max = npwgts[(*from)*ncon + i]; *cnum = i; break; @@ -735,7 +749,7 @@ MPI_Comm_rank(MPI_COMM_WORLD, &mype); } for (i++; i max && FPQueueGetQSize(&queues[i][*from]) > 0) { + if (npwgts[(*from)*ncon + i] > max && rpqLength(queues[*from][i]) > 0) { max = npwgts[(*from)*ncon + i]; *cnum = i; } @@ -749,9 +763,9 @@ MPI_Comm_rank(MPI_COMM_WORLD, &mype); for (part=0; part<2; part++) { for (i=0; i 0 && - FPQueueSeeMaxGain(&queues[i][part]) > maxgain) { - maxgain = FPQueueSeeMaxGain(&queues[i][part]); + if (rpqLength(queues[part][i]) > 0 && + rpqSeeTopKey(queues[part][i]) > maxgain) { + maxgain = rpqSeeTopKey(queues[part][i]); *from = part; *cnum = i; } @@ -762,30 +776,30 @@ MPI_Comm_rank(MPI_COMM_WORLD, &mype); return; } + /************************************************************************* * This function checks if the balance achieved is better than the diff * For now, it uses a 2-norm measure **************************************************************************/ -int Serial_BetterBalance(int ncon, float *npwgts, float *tpwgts, float *diff) +idx_t Serial_BetterBalance(idx_t ncon, real_t *npwgts, real_t *tpwgts, + real_t *diff, real_t *tmpdiff) { - int i; - float ndiff[MAXNCON]; + idx_t i; for (i=0; invtxs; - ncon = graph->ncon; - xadj = graph->xadj; - nvwgt = graph->nvwgt; + idx_t i, ii, j, k, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, limit, tmp, cnum; + idx_t *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; + idx_t *moved, *swaps, *qnum; + real_t *nvwgt, *npwgts, *mindiff, *tmpdiff, origbal, minbal, newbal; + rpq_t **parts[2]; + idx_t higain, mincut, newcut, mincutorder; + idx_t *qsizes[2]; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; - where = graph->where; - id = graph->sendind; - ed = graph->recvind; + where = graph->where; + id = graph->sendind; + ed = graph->recvind; npwgts = graph->gnpwgts; bndptr = graph->sendptr; bndind = graph->recvptr; - moved = idxmalloc(nvtxs, "moved"); - swaps = idxmalloc(nvtxs, "swaps"); - qnum = idxmalloc(nvtxs, "qnum"); - cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand"); + mindiff = rwspacemalloc(ctrl, ncon); + tmpdiff = rwspacemalloc(ctrl, ncon); + parts[0] = (rpq_t **)wspacemalloc(ctrl, sizeof(rpq_t *)*ncon); + parts[1] = (rpq_t **)wspacemalloc(ctrl, sizeof(rpq_t *)*ncon); + qsizes[0] = iset(ncon, 0, iwspacemalloc(ctrl, ncon)); + qsizes[1] = iset(ncon, 0, iwspacemalloc(ctrl, ncon)); + moved = iwspacemalloc(ctrl, nvtxs); + swaps = iwspacemalloc(ctrl, nvtxs); + qnum = iwspacemalloc(ctrl, nvtxs); - limit = amin(amax(0.01*nvtxs, 15), 100); + limit = gk_min(gk_max(0.01*nvtxs, 15), 100); /* Initialize the queues */ for (i=0; i qsizes[j][from] && nvwgt[i*ncon+qnum[i]] < 1.3*nvwgt[i*ncon+j]) { @@ -873,20 +892,12 @@ void Mc_Serial_Balance2Way(GraphType *graph, float *tpwgts, float lbfactor) newcut = mincut = graph->mincut; mincutorder = -1; - idxset(nvtxs, -1, moved); + iset(nvtxs, -1, moved); /* Insert all nodes in the priority queues */ nbnd = graph->gnvtxs; - for (i=0; i limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); - saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); - saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); + raxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); + raxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); break; } @@ -926,7 +937,7 @@ void Mc_Serial_Balance2Way(GraphType *graph, float *tpwgts, float lbfactor) /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ - SWAP(id[higain], ed[higain], tmp); + gk_SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); if (ed[higain] > 0 && bndptr[higain] == -1) @@ -940,7 +951,7 @@ void Mc_Serial_Balance2Way(GraphType *graph, float *tpwgts, float lbfactor) /* Update the queue position */ if (moved[k] == -1) - FPQueueUpdate(&parts[qnum[k]][where[k]], k, (float)(ed[k]-id[k])); + rpqUpdate(parts[where[k]][qnum[k]], k, (real_t)(ed[k]-id[k])); /* Update its boundary information */ if (ed[k] == 0 && bndptr[k] != -1) @@ -958,14 +969,14 @@ void Mc_Serial_Balance2Way(GraphType *graph, float *tpwgts, float lbfactor) higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; - SWAP(id[higain], ed[higain], tmp); + gk_SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); - saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); - saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); + raxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); + raxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); for (j=xadj[higain]; jnvtxs; - ncon = graph->ncon; - xadj = graph->xadj; + idx_t i, ii, j, k; + idx_t kwgt, nvtxs, nbnd, ncon, nswaps, from, to, cnum, tmp; + idx_t *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; + idx_t *qnum; + real_t *nvwgt, *npwgts; + rpq_t **parts[2]; + idx_t higain, mincut; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; adjncy = graph->adjncy; - nvwgt = graph->nvwgt; + nvwgt = graph->nvwgt; adjwgt = graph->adjwgt; - where = graph->where; - id = graph->sendind; - ed = graph->recvind; + where = graph->where; + id = graph->sendind; + ed = graph->recvind; npwgts = graph->gnpwgts; bndptr = graph->sendptr; bndind = graph->recvptr; - qnum = idxmalloc(nvtxs, "qnum"); - cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand"); + parts[0] = (rpq_t **)wspacemalloc(ctrl, sizeof(rpq_t *)*ncon); + parts[1] = (rpq_t **)wspacemalloc(ctrl, sizeof(rpq_t *)*ncon); + + qnum = iwspacemalloc(ctrl, nvtxs); /* This is called for initial partitioning so we know from where to pick nodes */ from = 1; to = (from+1)%2; for (i=0; i 0) - FPQueueInsert(&parts[qnum[i]][0], i, (float)(ed[i]-id[i])); + rpqInsert(parts[0][qnum[i]], i, (real_t)(ed[i]-id[i])); else - FPQueueInsert(&parts[qnum[i]][1], i, (float)(ed[i]-id[i])); + rpqInsert(parts[1][qnum[i]], i, (real_t)(ed[i]-id[i])); } } mincut = graph->mincut; - nbnd = graph->gnvtxs; + nbnd = graph->gnvtxs; for (nswaps=0; nswaps 0 && bndptr[higain] == -1) @@ -1094,11 +1104,11 @@ void Mc_Serial_Init2WayBalance(GraphType *graph, float *tpwgts) /* Update the queue position */ if (where[k] == from) { if (ed[k] > 0 && bndptr[k] == -1) { /* It moves in boundary */ - FPQueueDelete(&parts[qnum[k]][1], k); - FPQueueInsert(&parts[qnum[k]][0], k, (float)(ed[k]-id[k])); + rpqDelete(parts[1][qnum[k]], k); + rpqInsert(parts[0][qnum[k]], k, (real_t)(ed[k]-id[k])); } else { /* It must be in the boundary already */ - FPQueueUpdate(&parts[qnum[k]][0], k, (float)(ed[k]-id[k])); + rpqUpdate(parts[0][qnum[k]], k, (real_t)(ed[k]-id[k])); } } @@ -1114,11 +1124,11 @@ void Mc_Serial_Init2WayBalance(GraphType *graph, float *tpwgts) graph->gnvtxs = nbnd; for (i=0; i= max && - FPQueueGetQSize(&queues[i][0]) + FPQueueGetQSize(&queues[i][1]) > 0) { + rpqLength(queues[0][i]) + rpqLength(queues[1][i]) > 0) { max = npwgts[from*ncon+i]-tpwgts[i]; cnum = i; } @@ -1147,26 +1157,26 @@ int Serial_SelectQueueOneWay(int ncon, float *npwgts, float *tpwgts, int from, /************************************************************************* * This function computes the initial id/ed **************************************************************************/ -void Mc_Serial_Compute2WayPartitionParams(GraphType *graph) +void Mc_Serial_Compute2WayPartitionParams(ctrl_t *ctrl, graph_t *graph) { - int i, j, me, nvtxs, ncon, nbnd, mincut; - idxtype *xadj, *adjncy, *adjwgt; - float *nvwgt, *npwgts; - idxtype *id, *ed, *where; - idxtype *bndptr, *bndind; - - nvtxs = graph->nvtxs; - ncon = graph->ncon; - xadj = graph->xadj; - nvwgt = graph->nvwgt; + idx_t i, j, me, nvtxs, ncon, nbnd, mincut; + idx_t *xadj, *adjncy, *adjwgt; + real_t *nvwgt, *npwgts; + idx_t *id, *ed, *where; + idx_t *bndptr, *bndind; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; - where = graph->where; + where = graph->where; - npwgts = sset(2*ncon, 0.0, graph->gnpwgts); - id = idxset(nvtxs, 0, graph->sendind); - ed = idxset(nvtxs, 0, graph->recvind); - bndptr = idxset(nvtxs, -1, graph->sendptr); + npwgts = rset(2*ncon, 0.0, graph->gnpwgts); + id = iset(nvtxs, 0, graph->sendind); + ed = iset(nvtxs, 0, graph->recvind); + bndptr = iset(nvtxs, -1, graph->sendptr); bndind = graph->recvptr; /*------------------------------------------------------------ @@ -1175,7 +1185,7 @@ void Mc_Serial_Compute2WayPartitionParams(GraphType *graph) nbnd = mincut = 0; for (i=0; i 0 || xadj[i] == xadj[i+1]) { mincut += ed[i]; - bndptr[i] = nbnd; - bndind[nbnd++] = i; + BNDInsert(nbnd, bndind, bndptr, i); } } @@ -1196,13 +1205,14 @@ void Mc_Serial_Compute2WayPartitionParams(GraphType *graph) } + /************************************************************************* * This function checks if the vertex weights of two vertices are below * a given set of values **************************************************************************/ -int Serial_AreAnyVwgtsBelow(int ncon, float alpha, float *vwgt1, float beta, float *vwgt2, float *limit) +idx_t Serial_AreAnyVwgtsBelow(idx_t ncon, real_t alpha, real_t *vwgt1, real_t beta, real_t *vwgt2, real_t *limit) { - int i; + idx_t i; for (i=0; invtxs; i++) { for (j=graph->xadj[i]; jxadj[i+1]; j++) @@ -1230,13 +1240,14 @@ int ComputeSerialEdgeCut(GraphType *graph) return graph->mincut; } + /************************************************************************* * This function computes the TotalV of a serial graph. **************************************************************************/ -int ComputeSerialTotalV(GraphType *graph, idxtype *home) +idx_t ComputeSerialTotalV(graph_t *graph, idx_t *home) { - int i; - int totalv = 0; + idx_t i; + idx_t totalv = 0; for (i=0; invtxs; i++) if (graph->where[i] != home[i]) diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/setup.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/setup.c deleted file mode 100644 index 86b25558..00000000 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/setup.c +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * setup.c - * - * This file contains functions that setup the various communication - * data structures for parallel KWAY - * - * Started 2/21/96 - * George - * - * $Id: setup.c,v 1.3 2003/07/31 16:23:30 karypis Exp $ - * - */ - - -#include - -#define DEBUG_SETUPINFO_ - - -/*************************************************************************/ -/*! This function performs the following functions: - - determines the processors that contain adjacent vertices and setup - the infrastructure for efficient communication. - - localizes the numbering of the adjancency lists. -*/ -/**************************************************************************/ -void SetUp(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) -{ - int i, j, k, islocal, penum, gnvtxs, nvtxs, nlocal, firstvtx, lastvtx, nsend, nrecv, nnbrs, nadj; - int npes=ctrl->npes, mype=ctrl->mype; - idxtype *vtxdist, *xadj, *adjncy; - idxtype *peind, *recvptr, *recvind, *sendptr, *sendind; - idxtype *receive, *pemap, *imap, *lperm; - idxtype *pexadj, *peadjncy, *peadjloc, *startsind; - KeyValueType *recvrequests, *sendrequests, *adjpairs; - - if (graph->lperm != NULL) - return; /* The communication structure has already been setup */ - - IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); - IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->SetupTmr)); - - /* See if there is a need to adjust the memory allocated for the wspace */ - AdjustWSpace(ctrl, graph, wspace); - - gnvtxs = graph->gnvtxs; - nvtxs = graph->nvtxs; - vtxdist = graph->vtxdist; - xadj = graph->xadj; - adjncy = graph->adjncy; - - firstvtx = vtxdist[mype]; - lastvtx = vtxdist[mype+1]; - - pemap = wspace->pv1; - idxset(npes, -1, pemap); - - lperm = graph->lperm = idxmalloc(nvtxs, "SetUp: graph->lperm"); - for (i=0; iindices; /* Use the large global received array for now */ - adjpairs = wspace->pairs; - - for (nlocal = nadj = i = 0; i= firstvtx && k < lastvtx) { - /* local vertex */ - adjncy[j] = k-firstvtx; - } - else { - /* remote vertex */ - adjpairs[nadj].key = k; - adjpairs[nadj++].val = j; - islocal = 0; - } - } - if (islocal) { - lperm[i] = lperm[nlocal]; - lperm[nlocal++] = i; - } - } - - /* Take care the received part now */ - ikeysort(nadj, adjpairs); - adjpairs[nadj].key = gnvtxs+1; /* Boundary condition */ - for (nrecv=i=0; ipeind = idxmalloc(npes, "SetUp: peind"); - recvptr = graph->recvptr = idxmalloc(npes+1, "SetUp: recvptr"); - recvind = graph->recvind = idxmalloc(nrecv, "SetUp: recvind"); - - /* Take care of the received portion */ - idxcopy(nrecv, receive, recvind); /* Copy the vertices to be received into recvind */ - - i = nnbrs = recvptr[0] = 0; - for (penum=0; penum= vtxdist[penum+1]) - break; - } - if (j > i) { - peind[nnbrs] = penum; - recvptr[++nnbrs] = j; - i = j; - } - } - /* PrintVector(ctrl, nnbrs+1, 0, recvptr, "recvptr"); */ - - - /************************************************************* - * Determine what you need to send - *************************************************************/ - /* Tell the other processors what they need to send you */ - recvrequests = wspace->pepairs1; - sendrequests = wspace->pepairs2; - for (i=0; icomm); - - /* PrintPairs(ctrl, npes, recvrequests, "recvrequests"); */ - /* PrintPairs(ctrl, npes, sendrequests, "sendrequests"); */ - - sendptr = graph->sendptr = idxmalloc(npes+1, "SetUp: sendptr"); - startsind = wspace->pv2; - for (j=i=0; i 0) { - sendptr[j] = sendrequests[i].key; - startsind[j] = sendrequests[i].val; - j++; - } - } - - ASSERT(ctrl, j == nnbrs); - MAKECSR(i, nnbrs, sendptr); - - nsend = sendptr[nnbrs]; - sendind = graph->sendind = idxmalloc(nsend, "SetUp: sendind"); - - - /* Issue the receives for sendind */ - for (i=0; icomm, ctrl->rreq+i); - } - - /* Issue the sends. My recvind[penum] becomes penum's sendind[mype] */ - for (i=0; icomm, ctrl->sreq+i); - } - - MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); - MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); - - - - /* Create the peadjncy data structure for sparse boundary exchanges */ - pexadj = graph->pexadj = idxsmalloc(nvtxs+1, 0, "SetUp: pexadj"); - peadjncy = graph->peadjncy = idxmalloc(nsend, "SetUp: peadjncy"); - peadjloc = graph->peadjloc = idxmalloc(nsend, "SetUp: peadjloc"); - - for (i=0; i= firstvtx && sendind[i] < lastvtx, - (ctrl, "%d %d %d\n", sendind[i], firstvtx, lastvtx)); - pexadj[sendind[i]-firstvtx]++; - } - MAKECSR(i, nvtxs, pexadj); - - for (i=0; i0; i--) - pexadj[i] = pexadj[i-1]; - pexadj[0] = 0; - - - graph->nnbrs = nnbrs; - graph->nrecv = nrecv; - graph->nsend = nsend; - graph->nlocal = nlocal; - - - /* Create the inverse map from ladjncy to adjncy */ - imap = graph->imap = idxmalloc(nvtxs+nrecv, "SetUp: imap"); - for (i=0; idbglvl, DBG_TIME, stoptimer(ctrl->SetupTmr)); - -#ifdef DEBUG_SETUPINFO - rprintf(ctrl, "[%5d %5d] \tl:[%5d %5d] \ts:[%5d, %5d] \tr:[%5d, %5d]\n", - GlobalSEMin(ctrl, nvtxs), GlobalSEMax(ctrl, nvtxs), - GlobalSEMin(ctrl, nlocal), GlobalSEMax(ctrl, nlocal), - GlobalSEMin(ctrl, nsend), GlobalSEMax(ctrl, nsend), - GlobalSEMin(ctrl, nrecv), GlobalSEMax(ctrl, nrecv)); - - PrintSetUpInfo(ctrl, graph); -#endif -} - - diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/stat.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/stat.c index 6647008c..80373f02 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/stat.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/stat.c @@ -8,7 +8,7 @@ * Started 7/25/97 * George * - * $Id: stat.c,v 1.3 2003/07/23 00:54:56 karypis Exp $ + * $Id: stat.c 10578 2011-07-14 18:10:15Z karypis $ * */ @@ -19,11 +19,11 @@ /************************************************************************* * This function computes the balance of the partitioning **************************************************************************/ -void Mc_ComputeSerialBalance(CtrlType *ctrl, GraphType *graph, idxtype *where, float *ubvec) +void ComputeSerialBalance(ctrl_t *ctrl, graph_t *graph, idx_t *where, real_t *ubvec) { - int i, j, nvtxs, ncon, nparts; - idxtype *pwgts, *tvwgts, *vwgt; - float *tpwgts, maximb; + idx_t i, j, nvtxs, ncon, nparts; + idx_t *pwgts, *tvwgts, *vwgt; + real_t *tpwgts, maximb; nvtxs = graph->nvtxs; ncon = graph->ncon; @@ -31,8 +31,8 @@ void Mc_ComputeSerialBalance(CtrlType *ctrl, GraphType *graph, idxtype *where, f nparts = ctrl->nparts; tpwgts = ctrl->tpwgts; - pwgts = idxsmalloc(nparts*ncon, 0, "pwgts"); - tvwgts = idxsmalloc(ncon, 0, "tvwgts"); + pwgts = ismalloc(nparts*ncon, 0, "pwgts"); + tvwgts = ismalloc(ncon, 0, "tvwgts"); for (i=0; invtxs; i++) { for (j=0; jncon; nvtxs = graph->nvtxs; @@ -69,10 +70,10 @@ void Mc_ComputeParallelBalance(CtrlType *ctrl, GraphType *graph, idxtype *where, nparts = ctrl->nparts; tpwgts = ctrl->tpwgts; - lnpwgts = fmalloc(nparts*ncon, "CPB: lnpwgts"); - gnpwgts = fmalloc(nparts*ncon, "CPB: gnpwgts"); - sset(nparts*ncon, 0.0, lnpwgts); - sset(ncon, 1.0, lminvwgts); + lminvwgts = rset(ncon, 1.0, rwspacemalloc(ctrl, ncon)); + gminvwgts = rwspacemalloc(ctrl, ncon); + lnpwgts = rset(nparts*ncon, 0.0, rwspacemalloc(ctrl, nparts*ncon)); + gnpwgts = rwspacemalloc(ctrl, nparts*ncon); for (i=0; icomm); - MPI_Allreduce((void *)(lminvwgts), (void *)(gminvwgts), ncon, MPI_FLOAT, MPI_MIN, ctrl->comm); + gkMPI_Allreduce((void *)(lnpwgts), (void *)(gnpwgts), nparts*ncon, REAL_T, MPI_SUM, ctrl->comm); + gkMPI_Allreduce((void *)(lminvwgts), (void *)(gminvwgts), ncon, REAL_T, MPI_MIN, ctrl->comm); /* The +gminvwgts[j] in the following code is to deal with bad cases of tpwgts[i*ncon+j] == 0 */ for (j=0; jnpes; i++) { if (i == ctrl->mype) { for (j=0; jnpes; j++) - printf("%.3f ", matrix[j]); + printf("%.3"PRREAL" ", matrix[j]); printf("\n"); fflush(stdout); } - MPI_Barrier(ctrl->comm); + gkMPI_Barrier(ctrl->comm); } if (ctrl->mype == 0) { printf("****************************\n"); fflush(stdout); } - MPI_Barrier(ctrl->comm); + gkMPI_Barrier(ctrl->comm); return; } -/************************************************************************* -* This function computes stats for refinement -**************************************************************************/ -void Mc_ComputeRefineStats(CtrlType *ctrl, GraphType *graph, float *ubvec) +/***********************************************************************************/ +/*! This function prints post-partitioning information + */ +/***********************************************************************************/ +void PrintPostPartInfo(ctrl_t *ctrl, graph_t *graph, idx_t movestats) { - int h, i, j, k; - int nvtxs, ncon; - idxtype *xadj, *adjncy, *adjwgt, *where; - float *nvwgt, *lnpwgts, *gnpwgts; - RInfoType *rinfo; - int mype = ctrl->mype, nparts = ctrl->nparts; - idxtype *gborder, *border, *gfrom, *from, *gto, *to, *connect, *gconnect; - idxtype gain[20] = {0}, ggain[20]; - int lnborders, gnborders; - int bestgain, pmoves, gpmoves, other; - float tpwgts[MAXNCON], badmaxpwgt[MAXNCON]; - int HIST_FACTOR = graph->level + 1; + idx_t i, j, ncon, nmoved, maxin, maxout, nparts; + real_t maximb, *tpwgts; - nvtxs = graph->nvtxs; ncon = graph->ncon; - xadj = graph->xadj; - adjncy = graph->adjncy; - adjwgt = graph->adjwgt; - where = graph->where; - lnpwgts = graph->lnpwgts; - gnpwgts = graph->gnpwgts; - rinfo = graph->rinfo; - - connect = idxsmalloc(nparts*nparts, 0, "CRS: connect"); - gconnect = idxmalloc(nparts*nparts, "CRS: gconnect"); - border = idxsmalloc(nparts, 0, "CRS: border"); - gborder = idxmalloc(nparts, "CRS: gborder"); - from = idxsmalloc(nparts, 0, "CRS: from"); - gfrom = idxmalloc(nparts, "CRS: gfrom"); - to = idxsmalloc(nparts, 0, "CRS: to"); - gto = idxmalloc(nparts, "CRS: gto"); - - for (h=0; hnparts; + tpwgts = ctrl->tpwgts; - /***************************************/ - for (i=0; imincut); - for (i=0; ignpwgts[i*ncon+j]/tpwgts[i*ncon+j]); + rprintf(ctrl, "%.3"PRREAL" ", maximb); } - MPI_Reduce((void *)connect, (void *)gconnect, nparts*nparts, IDX_DATATYPE, MPI_MAX, 0, ctrl->comm); - if (mype == 0) { - printf("connectivity\n"); - for (i=0; i 0) { - lnborders++; - border[where[i]]++; - } - - MPI_Reduce((void *)border, (void *)gborder, nparts, IDX_DATATYPE, MPI_SUM, 0, ctrl->comm); - gnborders = GlobalSESum(ctrl, lnborders); - if (mype == 0) { - printf("number of borders: %d\n", gnborders); - for (i=0; invwgt+i*ncon; - - for (j=0; j badmaxpwgt[h]) - break; - if (h == ncon) - break; - } - if (j < rinfo[i].ndegrees) { - pmoves++; - from[where[i]]++; - to[other]++; - for (k=j+1; k badmaxpwgt[h]) - break; - - if (h == ncon) { - pmoves++; - from[where[i]]++; - to[other]++; - } - } - } - } +/************************************************************************* +* This function computes movement statistics for adaptive refinement +* schemes +**************************************************************************/ +void ComputeMoveStatistics(ctrl_t *ctrl, graph_t *graph, idx_t *nmoved, idx_t *maxin, idx_t *maxout) +{ + idx_t i, j, nvtxs; + idx_t *vwgt, *where; + idx_t *lpvtxs, *gpvtxs; - gpmoves = GlobalSESum(ctrl, pmoves); - MPI_Reduce((void *)from, (void *)gfrom, nparts, IDX_DATATYPE, MPI_SUM, 0, ctrl->comm); - MPI_Reduce((void *)to, (void *)gto, nparts, IDX_DATATYPE, MPI_SUM, 0, ctrl->comm); + nvtxs = graph->nvtxs; + vwgt = graph->vwgt; + where = graph->where; - if (mype == 0) { - printf("possible moves: %d\n", gpmoves); - printf("from "); - for (i=0; inparts, 0, "ComputeMoveStatistics: lpvtxs"); + gpvtxs = ismalloc(ctrl->nparts, 0, "ComputeMoveStatistics: gpvtxs"); - /***************************************/ - for (i=0; i 0) { - bestgain = rinfo[i].degrees[0].ewgt-rinfo[i].id; - for (j=0; j= 10) { - gain[19]++; - continue; - } - - if (bestgain / HIST_FACTOR < -10) { - gain[0]++; - continue; - } - - gain[(bestgain/HIST_FACTOR)+10]++; - } + for (j=i=0; imype) + j++; } - MPI_Reduce((void *)gain, (void *)ggain, 20, IDX_DATATYPE, MPI_SUM, 0, ctrl->comm); - if (mype == 0) { - printf("gain histogram (buckets of %d)\n", HIST_FACTOR); - for (i=0; i<20; i++) { - if (i == 10 || i == 11) - printf(" "); - printf("%d ", ggain[i]); - } - printf("\n\n"); - } + /* PrintVector(ctrl, ctrl->npes, 0, lpvtxs, "Lpvtxs: "); */ + gkMPI_Allreduce((void *)lpvtxs, (void *)gpvtxs, ctrl->nparts, IDX_T, MPI_SUM, ctrl->comm); + *nmoved = GlobalSESum(ctrl, j); + *maxout = GlobalSEMax(ctrl, j); + *maxin = GlobalSEMax(ctrl, gpvtxs[ctrl->mype]-(nvtxs-j)); + gk_free((void **)&lpvtxs, (void **)&gpvtxs, LTERM); +} - /***************************************/ - if (mype == 0) printf("******************************\n"); - if (mype == 0) printf("******************************\n"); - GKfree((void **)&gconnect, (void **)&connect, (void **)&gborder, (void **)&border, (void **)&gfrom, (void **)&from, (void **)>o, (void **)&to, LTERM); - return; -} diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/stdheaders.h b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/stdheaders.h deleted file mode 100644 index 536caf26..00000000 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/stdheaders.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright 1997, Regents of the University of Minnesota - * - * stdheaders.h - * - * This file includes all necessary header files - * - * Started 8/27/94 - * George - * - * $Id: stdheaders.h,v 1.4 2003/07/25 14:31:47 karypis Exp $ - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/struct.h b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/struct.h index bb2e2529..6d5add48 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/struct.h +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/struct.h @@ -8,137 +8,41 @@ * Started 9/26/95 * George * - * $Id: struct.h,v 1.2 2003/07/21 17:50:22 karypis Exp $ + * $Id: struct.h 10592 2011-07-16 21:17:53Z karypis $ */ -/* Indexes are as long as integers for now */ -#ifdef IDXTYPE_INT -#define IDX_DATATYPE MPI_INT -#define MAX_INT INT_MAX -#define MIN_INT INT_MIN -#else -#define IDX_DATATYPE MPI_SHORT -#define MAX_INT SHRT_MAX -#define MIN_INT SHRT_MIN -#endif +/*************************************************************************/ +/*! This data structure stores cut-based k-way refinement info about an + * adjacent subdomain for a given vertex. */ +/*************************************************************************/ +typedef struct cnbr_t { + idx_t pid; /*!< The partition ID */ + idx_t ed; /*!< The sum of the weights of the adjacent edges + that are incident on pid */ +} cnbr_t; -/************************************************************************* -* The following data structure stores key-value pair -**************************************************************************/ -struct KeyValueType { - idxtype key; - idxtype val; -}; - -typedef struct KeyValueType KeyValueType; - -/************************************************************************* -* The following data structure stores key-value pair -**************************************************************************/ -struct KVType { - int key; - float val; -}; - -typedef struct KVType KVType; - - -/************************************************************************* -* The following data structure stores key-value pair -**************************************************************************/ -struct FKeyValueType { - float key; - idxtype val; -}; - -typedef struct FKeyValueType FKeyValueType; /************************************************************************* * The following data structure stores key-key-value triplets **************************************************************************/ -struct KeyKeyValueType { - idxtype key1, key2; - idxtype val; -}; - -typedef struct KeyKeyValueType KeyKeyValueType; - -/************************************************************************* -* The following data structure is used to store the buckets for the -* refinment algorithms -**************************************************************************/ -struct PQueueType { - int nnodes; - int maxnnodes; - idxtype *perm, *iperm, *values; - /* iperm[i] stores where the ith entry is located - perm[i] stores the entry that is located in the ith position */ -}; - -typedef struct PQueueType PQueueType; - - -/************************************************************************* -* The following data structure is used to store the buckets for the -* refinment algorithms -**************************************************************************/ -struct FPQueueType { - int type; /* The type of the representation used */ - int nnodes; - int maxnodes; - - /* Heap version of the data structure */ - FKeyValueType *heap; - idxtype *locator; -}; - -typedef struct FPQueueType FPQueueType; - -/************************************************************************* -* The following data structure stores an edge -**************************************************************************/ -struct edgedef { - idxtype edge; - idxtype ewgt; -}; -typedef struct edgedef EdgeType; - - -/************************************************************************* -* This data structure holds various working space data -**************************************************************************/ -struct workspacedef { - idxtype *core; /* Where pairs, indices, and degrees are coming from */ - int maxcore; - - int nlarge; /* The size of 'Large' */ - int nparts, npes; /* The size of the different p* vectors */ - - KeyValueType *pairs; /* Large pair array used during setup */ - idxtype *indices; /* Large array of indxtype used for various purposes */ - - /* Auxiliary parameters */ - idxtype *pv1, *pv2, *pv3, *pv4; /* Vectors of npes+1 size used in various places */ - KeyValueType *pepairs1, *pepairs2; - - EdgeType *degrees; -}; - -typedef struct workspacedef WorkSpaceType; +typedef struct i2kv_t { + idx_t key1, key2; + idx_t val; +} i2kv_t; /************************************************************************* * The following data structure holds information on degrees for k-way * partition **************************************************************************/ -struct rinfodef { - int id, ed; /* ID/ED of edges */ - int ndegrees; /* The number of different ext-degrees */ - EdgeType *degrees; /* List of edges */ -}; - -typedef struct rinfodef RInfoType; +typedef struct ckrinfo_t { + idx_t id; /*!< The internal degree of a vertex (sum of weights) */ + idx_t ed; /*!< The total external degree of a vertex */ + idx_t nnbrs; /*!< The number of neighboring subdomains */ + idx_t inbr; /*!< The index in the cnbr_t array where the nnbrs list + of neighbors is stored */ +} ckrinfo_t; /************************************************************************* @@ -146,7 +50,7 @@ typedef struct rinfodef RInfoType; * partition **************************************************************************/ struct nrinfodef { - int edegrees[2]; + idx_t edegrees[2]; }; typedef struct nrinfodef NRInfoType; @@ -156,78 +60,110 @@ typedef struct nrinfodef NRInfoType; * The following data structure stores a sparse matrix in CSR format * The diagonal entry is in the first position of each row. **************************************************************************/ -struct matrixdef { - int nrows, nnzs; /* Number of rows and nonzeros in the matrix */ - idxtype *rowptr; - idxtype *colind; - float *values; - float *transfer; -}; - -typedef struct matrixdef MatrixType; +typedef struct matrix_t { + idx_t nrows, nnzs; /* Number of rows and nonzeros in the matrix */ + idx_t *rowptr; + idx_t *colind; + real_t *values; + real_t *transfer; +} matrix_t; /************************************************************************* * This data structure holds the input graph **************************************************************************/ -struct graphdef { - int gnvtxs, nvtxs, nedges, ncon, nobj; - idxtype *xadj; /* Pointers to the locally stored vertices */ - idxtype *vwgt; /* Vertex weights */ - float *nvwgt; /* Vertex weights */ - idxtype *vsize; /* Vertex size */ - idxtype *adjncy; /* Array that stores the adjacency lists of nvtxs */ - idxtype *adjwgt; /* Array that stores the weights of the adjacency lists */ - idxtype *vtxdist; /* Distribution of vertices */ - idxtype *home; /* The initial partition of the vertex */ +typedef struct graph_t { + idx_t gnvtxs, nvtxs, nedges, ncon, nobj; + idx_t *xadj; /* Pointers to the locally stored vertices */ + idx_t *vwgt; /* Vertex weights */ + real_t *nvwgt; /* Vertex weights */ + idx_t *vsize; /* Vertex size */ + idx_t *adjncy; /* Array that stores the adjacency lists of nvtxs */ + idx_t *adjwgt; /* Array that stores the weights of the adjacency lists */ + idx_t *vtxdist; /* Distribution of vertices */ + idx_t *home; /* The initial partition of the vertex */ + + /* used for not freeing application supplied arrays */ + idx_t free_xadj; + idx_t free_adjncy; + idx_t free_vwgt; + idx_t free_adjwgt; + idx_t free_vsize; /* Coarsening structures */ - idxtype *match; - idxtype *cmap; + idx_t *match; + idx_t *cmap; + + /* Dropedges */ + idx_t *unmatched; /* used to mark the coarse vertices that resulted from match[u]=u */ /* Used during initial partitioning */ - idxtype *label; + idx_t *label; /* Communication/Setup parameters */ - int nnbrs, nrecv, nsend; /* The number of neighboring processors */ - idxtype *peind; /* Array of size nnbrs storing the neighboring PEs */ - idxtype *sendptr, *sendind; /* CSR format of the vertices that are sent */ - idxtype *recvptr, *recvind; /* CSR format of the vertices that are received */ - idxtype *imap; /* The inverse map of local to global indices */ - idxtype *pexadj, *peadjncy, - *peadjloc; /* CSR format of the PEs each vertex is adjancent to */ - - int nlocal; /* Number of interior vertices */ - idxtype *lperm; /* lperm[0:nlocal] points to interior vertices, the rest are interface */ + idx_t nnbrs; /*!< The number of neighboring processors */ + idx_t nrecv; /*!< The total number of remote vertices that need to + be received. nrecv == recvptr[nnbrs] */ + idx_t nsend; /*!< The total number of local vertices that need to + be sent. This corresponds to the communication + volume of each pe, in the sense that if a vertex + needs to be sent multiple times, it is accounted + in nsend. nsend == sendptr[nnbrs] */ + idx_t *peind; /*!< Array of size nnbrs storing the neighboring PEs */ + idx_t *sendptr, *sendind; /*!< CSR format of the vertices that are sent to each + of the neighboring processors */ + idx_t *recvptr, *recvind; /*!< CSR format of the vertices that are received from + each of the neighboring PEs. */ + idx_t *imap; /*!< The inverse map of local to global indices */ + idx_t *pexadj, *peadjncy, + *peadjloc; /*!< CSR format of the PEs each vertex is adjancent to + along with the location in the sendind of the + non-local adjancent vertices */ + + idx_t nlocal; /*!< Number of interior vertices */ + idx_t *lperm; /*!< lperm[0:nlocal] points to interior vertices, + the rest are interface */ /* Communication parameters for projecting the partition. * These are computed during CreateCoarseGraph and used during projection * Note that during projection, the meaning of received and sent is reversed! */ - idxtype *rlens, *slens; /* Arrays of size nnbrs of how many vertices you are sending and receiving */ - KeyValueType *rcand; + idx_t *rlens, *slens; /* Arrays of size nnbrs of how many vertices you are sending and receiving */ + ikv_t *rcand; /* Partition parameters */ - idxtype *where; - idxtype *lpwgts, *gpwgts; - float *lnpwgts, *gnpwgts; - RInfoType *rinfo; + idx_t *where; + idx_t *lpwgts, *gpwgts; + real_t *lnpwgts, *gnpwgts; + ckrinfo_t *ckrinfo; + /* Node refinement information */ - int nsep; /* The number of vertices in the separator */ + idx_t nsep; /* The number of vertices in the separator */ NRInfoType *nrinfo; - idxtype *sepind; /* The indices of the vertices in the separator */ + idx_t *sepind; /* The indices of the vertices in the separator */ - int lmincut, mincut; + /* Vertex/edge metadata information use by DistDGL */ + size_t emdata_size, vmdata_size; + idx_t *vmptr, *emptr; + char *vmdata, *emdata; + idx_t *vtype; - int level; - int match_type; - int edgewgt_type; - struct graphdef *coarser, *finer; -}; + /* Various fields for out-of-core processing */ + int gID; + int ondisk; + + + idx_t lmincut, mincut; + + idx_t level; + idx_t match_type; + idx_t edgewgt_type; + + struct graph_t *coarser, *finer; +} graph_t; -typedef struct graphdef GraphType; /************************************************************************* @@ -239,64 +175,89 @@ typedef double timer; /************************************************************************* * The following structure stores information used by parallel kmetis **************************************************************************/ -struct controldef { - int mype, npes; /* Info about the parallel system */ - int CoarsenTo; /* The # of vertices in the coarsest graph */ - int dbglvl; /* Controls the debuging output of the program */ - int nparts; /* The number of partitions */ - int foldf; /* What is the folding factor */ - int mtype; /* The matching type */ - int ipart; /* The initial partitioning type */ - int rtype; /* The refinement type */ - int xyztype; /* The coordinate indexing type */ - int p_nseps; /* The number of separators to compute at each +typedef struct ctrl_t { + pmoptype_et optype; /*!< The operation being performed */ + idx_t mype, npes; /*!< Info about the parallel system */ + idx_t ncon; /*!< The number of balancing constraints */ + idx_t CoarsenTo; /*!< The # of vertices in the coarsest graph */ + idx_t dbglvl; /*!< Controls the debuging output of the program */ + idx_t nparts; /*!< The number of partitions */ + idx_t foldf; /*!< What is the folding factor */ + idx_t mtype; /*!< The matching type */ + idx_t ipart; /*!< The initial partitioning type */ + idx_t rtype; /*!< The refinement type */ + idx_t p_nseps; /*!< The number of separators to compute at each parallel bisection */ - int s_nseps; /* The number of separators to compute at each + idx_t s_nseps; /* The number of separators to compute at each serial bisection */ - float ubfrac; /* The max/avg fraction for separator bisections */ - int seed; /* Random number seed */ - int sync; /* Random number seed */ - float *tpwgts; /* Target subdomain weights */ - int tvwgts[MAXNCON]; - float ubvec[MAXNCON]; - int partType; - int ps_relation; - - float redist_factor, redist_base, ipc_factor; - float edge_size_ratio; - MatrixType *matrix; - - MPI_Comm gcomm; - MPI_Comm comm; /* MPI Communicator */ - MPI_Request sreq[MAX_PES], - rreq[MAX_PES]; /* MPI send and receive requests */ - MPI_Status statuses[MAX_PES]; + real_t ubfrac; /* The max/avg fraction for separator bisections */ + idx_t seed; /* Random number seed */ + idx_t sync; /* Random number seed */ + real_t *tpwgts; /* Target subdomain weights */ + real_t *invtvwgts; /* Per-constraint 1/total vertex weight */ + real_t *ubvec; /* Per-constraint unbalance factor */ + + idx_t dropedges; + idx_t twohop; + idx_t fast; + + + idx_t partType; + idx_t ps_relation; + + real_t redist_factor; + real_t redist_base; + real_t ipc_factor; + real_t edge_size_ratio; + matrix_t *matrix; + + idx_t free_comm; /*!< Used to indicate if gcomm needs to be freed */ + MPI_Comm gcomm; /*!< A copy of the application supplied communicator */ + MPI_Comm comm; /*!< The current communicator */ + idx_t ncommpes; /*!< The maximum number of processors that a processor + may need to communicate with. This determines the + size of the sreq/rreq/statuses arrays and is + updated after every call to CommSetup() */ + MPI_Request *sreq; /*!< MPI send requests */ + MPI_Request *rreq; /*!< MPI receive requests */ + MPI_Status *statuses; /*!< MPI status for p2p i-messages */ MPI_Status status; + /* workspace variables */ + gk_mcore_t *mcore; /* GKlib's mcore */ + + /* These are for use by the k-way refinement routines */ + size_t nbrpoolsize; /*!< The number of cnbr_t entries that have been allocated */ + size_t nbrpoolcpos; /*!< The position of the first free entry in the array */ + size_t nbrpoolreallocs; /*!< The number of times the pool was resized */ + + cnbr_t *cnbrpool; /*!< The pool of cnbr_t entries to be used during refinement. + The size and current position of the pool is controlled + by nnbrs & cnbrs */ + + /* ondisk-related info */ + idx_t ondisk; + pid_t pid; /*!< The pid of the running process */ + /* Various Timers */ timer TotalTmr, InitPartTmr, MatchTmr, ContractTmr, CoarsenTmr, RefTmr, - SetupTmr, ColorTmr, ProjectTmr, KWayInitTmr, KWayTmr, MoveTmr, - RemapTmr, SerialTmr, AuxTmr1, AuxTmr2, AuxTmr3, AuxTmr4, AuxTmr5, - AuxTmr6; -}; - -typedef struct controldef CtrlType; + SetupTmr, ProjectTmr, KWayInitTmr, KWayTmr, MoveTmr, RemapTmr, + SerialTmr, AuxTmr1, AuxTmr2, AuxTmr3, AuxTmr4, AuxTmr5, AuxTmr6; +} ctrl_t; /************************************************************************* * The following data structure stores a mesh. **************************************************************************/ -struct meshdef { - int etype; - int gnelms, gnns; - int nelms, nns; - int ncon; - int esize, gminnode; - idxtype *elmdist; - idxtype *elements; - idxtype *elmwgt; -}; - -typedef struct meshdef MeshType; +typedef struct mesh_t { + idx_t etype; + idx_t gnelms, gnns; + idx_t nelms, nns; + idx_t ncon; + idx_t esize, gminnode; + idx_t *elmdist; + idx_t *elements; + idx_t *elmwgt; +} mesh_t; diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/temp/metis_defs.h b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/temp/metis_defs.h index 6fb079a6..ec0d50d5 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/temp/metis_defs.h +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/temp/metis_defs.h @@ -8,7 +8,7 @@ * Started 8/27/94 * George * - * $Id: defs.h,v 1.4 2003/07/22 20:29:05 karypis Exp $ + * $Id: defs.h 10543 2011-07-11 19:32:24Z karypis $ * */ @@ -31,8 +31,6 @@ #define REDIST_WGT 2.0 #define MAXNVWGT_FACTOR 2.0 -#define MAXNCON 12 -#define MAXNOBJ 12 #define N_MOC_REDO_PASSES 10 #define N_MOC_GR_PASSES 8 #define NREMAP_PASSES 8 @@ -47,10 +45,6 @@ #define NGD_PASSES 20 -#define OPTION_IPART 1 -#define OPTION_FOLDF 2 -#define OPTION_DBGLVL 3 - #define PMV3_OPTION_DBGLVL 1 #define PMV3_OPTION_SEED 2 #define PMV3_OPTION_IPART 3 @@ -75,10 +69,7 @@ #define NLGR_PASSES 5 /* Number of GR refinement during IPartition */ #define SMALLFLOAT 0.000001 -#define KEEP_BIT ((idxtype)(1<<((sizeof(idxtype)*8)-2))) -#define MAX_PES 8192 -#define MAX_NPARTS 67108864 #define COARSEN_FRACTION 0.75 /* Node reduction between succesive coarsening levels */ #define COARSEN_FRACTION2 0.55 /* Node reduction between succesive coarsening levels */ diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/temp/parmetislib.h b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/temp/parmetislib.h index 3008deab..298c28a8 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/temp/parmetislib.h +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/temp/parmetislib.h @@ -8,7 +8,7 @@ * Started 8/27/94 * George * - * $Id: parmetislib.h,v 1.2 2003/07/21 17:50:22 karypis Exp $ + * $Id: parmetislib.h 10379 2011-06-22 18:51:24Z benjamin $ */ /* @@ -16,19 +16,17 @@ #define DMALLOC 1 */ -#include - -// updated to place in current directory +#include +#include #include -// #include "../parmetis_svfsi_parmetis.h" -#ifdef DMALLOC -#include -#endif -#include -#include -#include -#include -#include +#include "gklib_parmetis_svfsi_defs.h" +#include "parmetis_svfsi_rename.h" +#include "parmetis_svfsi_defs.h" +#include "parmetis_svfsi_struct.h" +#include "parmetis_svfsi_macros.h" +#include "parmetis_svfsi_proto.h" + +#define MAXNCON 32 diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/timer.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/timer.c index 428fc1d2..9fd01de2 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/timer.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/timer.c @@ -8,7 +8,7 @@ * Started 10/19/96 * George * - * $Id: timer.c,v 1.2 2003/07/21 17:18:54 karypis Exp $ + * $Id: timer.c 10052 2011-06-01 22:29:57Z karypis $ * */ @@ -20,7 +20,7 @@ /************************************************************************* * This function initializes the various timers **************************************************************************/ -void InitTimers(CtrlType *ctrl) +void InitTimers(ctrl_t *ctrl) { cleartimer(ctrl->TotalTmr); cleartimer(ctrl->InitPartTmr); @@ -48,7 +48,7 @@ void InitTimers(CtrlType *ctrl) /************************************************************************* * This function prints timing information about KMETIS **************************************************************************/ -void PrintTimingInfo(CtrlType *ctrl) +void PrintTimingInfo(ctrl_t *ctrl) { /* PrintTimer(ctrl, ctrl->CoarsenTmr, " Coarsening"); */ PrintTimer(ctrl, ctrl->SetupTmr, " Setup"); @@ -75,18 +75,18 @@ void PrintTimingInfo(CtrlType *ctrl) /************************************************************************* * This function prints timer stat **************************************************************************/ -void PrintTimer(CtrlType *ctrl, timer tmr, char *msg) +void PrintTimer(ctrl_t *ctrl, timer tmr, char *msg) { double sum, max, tsec; tsec = gettimer(tmr); - MPI_Reduce((void *)&tsec, (void *)&sum, 1, MPI_DOUBLE, MPI_SUM, 0, ctrl->comm); + gkMPI_Reduce((void *)&tsec, (void *)&sum, 1, MPI_DOUBLE, MPI_SUM, 0, ctrl->comm); tsec = gettimer(tmr); - MPI_Reduce((void *)&tsec, (void *)&max, 1, MPI_DOUBLE, MPI_MAX, 0, ctrl->comm); + gkMPI_Reduce((void *)&tsec, (void *)&max, 1, MPI_DOUBLE, MPI_MAX, 0, ctrl->comm); if (ctrl->mype == 0 && sum != 0.0) - printf("%s: Max: %7.3f, Sum: %7.3f, Balance: %7.3f\n", - msg, (float)max, (float)sum, (float)(max*ctrl->npes/sum)); + printf("%s: Max: %7.3"PRREAL", Sum: %7.3"PRREAL", Balance: %7.3"PRREAL"\n", + msg, (real_t)max, (real_t)sum, (real_t)(max*ctrl->npes/sum)); } diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/util.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/util.c index f0dd9c29..01a82ba6 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/util.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/util.c @@ -8,7 +8,7 @@ * Started 9/28/95 * George * - * $Id: util.c,v 1.2 2003/07/21 17:18:54 karypis Exp $ + * $Id: util.c 10057 2011-06-02 13:44:44Z karypis $ */ #include @@ -17,32 +17,11 @@ /************************************************************************* * This function prints an error message and exits **************************************************************************/ -void errexit(char *f_str,...) +void myprintf(ctrl_t *ctrl, char *f_str,...) { va_list argp; - fprintf(stderr, "[ParMETIS Fatal Error] "); - - va_start(argp, f_str); - vfprintf(stderr, f_str, argp); - va_end(argp); - - if (strlen(f_str) == 0 || f_str[strlen(f_str)-1] != '\n') - fprintf(stderr,"\n"); - fflush(stderr); - - abort(); -} - - -/************************************************************************* -* This function prints an error message and exits -**************************************************************************/ -void myprintf(CtrlType *ctrl, char *f_str,...) -{ - va_list argp; - - fprintf(stdout, "[%2d] ", ctrl->mype); + fprintf(stdout, "[%2"PRIDX"] ", ctrl->mype); va_start(argp, f_str); vfprintf(stdout, f_str, argp); @@ -55,11 +34,10 @@ void myprintf(CtrlType *ctrl, char *f_str,...) } - /************************************************************************* * This function prints an error message and exits **************************************************************************/ -void rprintf(CtrlType *ctrl, char *f_str,...) +void rprintf(ctrl_t *ctrl, char *f_str,...) { va_list argp; @@ -71,316 +49,18 @@ void rprintf(CtrlType *ctrl, char *f_str,...) fflush(stdout); - MPI_Barrier(ctrl->comm); - -} - - -#ifndef DMALLOC -/************************************************************************* -* The following function allocates an array of integers -**************************************************************************/ -int *imalloc(int n, char *msg) -{ - if (n == 0) - return NULL; - - return (int *)GKmalloc(sizeof(int)*n, msg); -} - - -/************************************************************************* -* The following function allocates an array of integers -**************************************************************************/ -idxtype *idxmalloc(int n, char *msg) -{ - if (n == 0) - return NULL; - - return (idxtype *)GKmalloc(sizeof(idxtype)*n, msg); -} - - -/************************************************************************* -* The following function allocates an array of float -**************************************************************************/ -float *fmalloc(int n, char *msg) -{ - if (n == 0) - return NULL; - - return (float *)GKmalloc(sizeof(float)*n, msg); -} - - -/************************************************************************* -* The follwoing function allocates an array of integers -**************************************************************************/ -int *ismalloc(int n, int ival, char *msg) -{ - if (n == 0) - return NULL; - - return iset(n, ival, (int *)GKmalloc(sizeof(int)*n, msg)); -} - - - -/************************************************************************* -* The follwoing function allocates an array of integers -**************************************************************************/ -idxtype *idxsmalloc(int n, idxtype ival, char *msg) -{ - if (n == 0) - return NULL; - - return idxset(n, ival, (idxtype *)GKmalloc(sizeof(idxtype)*n, msg)); -} - - -/************************************************************************* -* This function is my wrapper around malloc -**************************************************************************/ -void *GKmalloc(int nbytes, char *msg) -{ - void *ptr; - - if (nbytes == 0) - return NULL; - - ptr = (void *)malloc(nbytes); - if (ptr == NULL) - errexit("***Memory allocation failed for %s. Requested size: %d bytes", msg, nbytes); - - return ptr; -} -#endif - -/************************************************************************* -* This function is my wrapper around free, allows multiple pointers -**************************************************************************/ -void GKfree(void **ptr1,...) -{ - va_list plist; - void **ptr; - - if (*ptr1 != NULL) - free(*ptr1); - *ptr1 = NULL; - - va_start(plist, ptr1); - - while ((ptr = va_arg(plist, void **)) != LTERM) { - if (*ptr != NULL) - free(*ptr); - *ptr = NULL; - } - - va_end(plist); -} - - -/************************************************************************* -* These functions set the values of a vector -**************************************************************************/ -int *iset(int n, int val, int *x) -{ - int i; - - for (i=0; icomm); - return x; } - -/************************************************************************* -* These functions return the index of the maximum element in a vector -**************************************************************************/ -int idxamax(int n, idxtype *x) -{ - int i, max=0; - - for (i=1; i x[max] ? i : max); - - return max; -} - - -/************************************************************************* -* These functions return the index of the minimum element in a vector -**************************************************************************/ -int idxamin(int n, idxtype *x) -{ - int i, min=0; - - for (i=1; ikey != n2->key ? n1->key - n2->key : n1->val - n2->val); -} - - - -/************************************************************************* -* This function sorts an array of type KeyValueType in increasing order -**************************************************************************/ -void dkeyvalsort(int n, KeyValueType *nodes) -{ - qsort((void *)nodes, (size_t)n, (size_t)sizeof(KeyValueType), DecKeyValueCmp); -} - - -/************************************************************************* -* This function compares 2 KeyValueType variables for sorting in inc order -**************************************************************************/ -int DecKeyValueCmp(const void *v1, const void *v2) -{ - KeyValueType *n1, *n2; - - n1 = (KeyValueType *)v1; - n2 = (KeyValueType *)v2; - - return n2->key - n1->key; - -} - - - /************************************************************************* * This function does a binary search on an array for a key and returns * the index **************************************************************************/ -int BSearch(int n, idxtype *array, int key) +idx_t BSearch(idx_t n, idx_t *array, idx_t key) { - int a=0, b=n, c; + idx_t a=0, b=n, c; while (b-a > 8) { c = (a+b)>>1; @@ -395,22 +75,21 @@ int BSearch(int n, idxtype *array, int key) return c; } - errexit("Key %d not found!\n", key); + errexit("Key %"PRIDX" not found!\n", key); return 0; } - /************************************************************************* * This file randomly permutes the contents of an array. * flag == 0, don't initialize perm * flag == 1, set p[i] = i **************************************************************************/ -void RandomPermute(int n, idxtype *p, int flag) +void RandomPermute(idx_t n, idx_t *p, idx_t flag) { - int i, u, v; - idxtype tmp; + idx_t i, u, v; + idx_t tmp; if (flag == 1) { for (i=0; i>1); return (a > 1 ? 0 : 1); @@ -468,51 +147,21 @@ int ispow2(int a) /************************************************************************* * This function returns the log2(x) **************************************************************************/ -int log2Int(int a) +idx_t log2Int(idx_t a) { - int i; + idx_t i; for (i=1; a > 1; i++, a = a>>1); return i-1; } -/************************************************************************* -* These functions set the values of a vector -**************************************************************************/ -float *sset(int n, float val, float *x) -{ - int i; - - for (i=0; i x[max] ? i : max); - - return max; -} - - /************************************************************************* * These functions return the index of the maximum element in a vector **************************************************************************/ -int samax_strd(int n, float *x, int incx) +size_t rargmax_strd(size_t n, real_t *x, size_t incx) { - int i; - int max=0; + size_t i, max=0; n *= incx; for (i=incx; i fabs(x[max]) ? i : max); - - return max; -} - - - -/************************************************************************* -* These functions return the index of the maximum element in a vector -**************************************************************************/ -int samin_strd(int n, float *x, int incx) +size_t rargmin_strd(size_t n, real_t *x, size_t incx) { - int i; - int min=0; + size_t i, min=0; n *= incx; for (i=incx; i x[max] ? i : max); - - return max/incx; -} - - -/************************************************************************* -* These functions return the index of the maximum element in a vector -**************************************************************************/ -int idxamin_strd(int n, idxtype *x, int incx) -{ - int i, min=0; - - n *= incx; - for (i=incx; i x[max] ? i : max); - - return max; -} - - -/************************************************************************* -* These functions return the index of the maximum element in a vector -**************************************************************************/ -int sfavg(int n, float *x) -{ - int i; - float total = 0.0; - - if (n == 0) - return 0.0; - - for (i=0; i x[1]) { max1 = 0; @@ -676,105 +216,46 @@ int samax2(int n, float *x) /************************************************************************* -* These functions return the index of the minimum element in a vector -**************************************************************************/ -int samin(int n, float *x) -{ - int i, min=0; - - for (i=1; i=0; n--) - y[n] += x[n]; -} - - -/************************************************************************* -* This function sums the entries in an array +* This function returns the average value of an array **************************************************************************/ -float ssum(int n, float *x) +real_t ravg(size_t n, real_t *x) { - int i; - float sum = 0.0; + size_t i; + real_t retval = 0.0; for (i=0; ival > second->val) - return 1; - - if (first->val < second->val) - return -1; - - return 0; -} - -/************************************************************************* -* This is the inverse comparison function -**************************************************************************/ -int imyvalkeycompare(const void *fptr, const void *sptr) -{ - KVType *first, *second; - - first = (KVType *)(fptr); - second = (KVType *)(sptr); - - if (first->val > second->val) - return -1; - - if (first->val < second->val) - return 1; - - return 0; -} - - -/************************************************************************* -* The following function allocates and sets an array of floats -**************************************************************************/ -float *fsmalloc(int n, float fval, char *msg) -{ - if (n == 0) - return NULL; - - return sset(n, fval, (float *)GKmalloc(sizeof(float)*n, msg)); -} - - -/************************************************************************* -* This function computes a 2-norm -**************************************************************************/ -void saxpy2(int n, float alpha, float *x, int incx, float *y, int incy) -{ - int i; - - for (i=0; invtxs; nedges = graph->nedges; @@ -47,47 +47,59 @@ float WavefrontDiffusion(CtrlType *ctrl, GraphType *graph, idxtype *home) flowFactor = (ctrl->mype == 4) ? 1.00 : flowFactor; /* allocate memory */ - solution = fmalloc(4*nparts+2*nedges, "WavefrontDiffusion: solution"); - tmpvec = solution + nparts; - npwgts = solution + 2*nparts; - load = solution + 3*nparts; - matrix.values = solution + 4*nparts; - transfer = matrix.transfer = solution + 4*nparts + nedges; + solution = rmalloc(6*nparts+2*nedges, "WavefrontDiffusion: solution"); + tmpvec = solution + nparts; /* nparts */ + npwgts = solution + 2*nparts; /* nparts */ + load = solution + 3*nparts; /* nparts */ + matrix.values = solution + 4*nparts; /* nparts+nedges */ + transfer = matrix.transfer = solution + 5*nparts + nedges /* nparts+nedges */; - perm = idxmalloc(2*nvtxs+2*nparts+nedges+1, "WavefrontDiffusion: perm"); - ed = perm + nvtxs; - psize = perm + 2*nvtxs; - rowptr = matrix.rowptr = perm + 2*nvtxs + nparts; - colind = matrix.colind = perm + 2*nvtxs + 2*nparts + 1; + perm = imalloc(2*nvtxs+3*nparts+nedges+1, "WavefrontDiffusion: perm"); + ed = perm + nvtxs; /* nvtxs */ + psize = perm + 2*nvtxs; /* nparts */ + rowptr = matrix.rowptr = perm + 2*nvtxs + nparts; /* nparts+1 */ + colind = matrix.colind = perm + 2*nvtxs + 2*nparts + 1; /* nparts+nedges */ - wsize = amax(sizeof(float)*nparts*6, sizeof(idxtype)*(nvtxs+nparts*2+1)); - workspace = (float *)GKmalloc(wsize, "WavefrontDiffusion: workspace"); - cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "WavefrontDiffusion: cand"); + /*GKTODO - Potential problem with this malloc */ + wsize = gk_max(sizeof(real_t)*6*nparts, sizeof(idx_t)*(nvtxs+2*nparts+1)); + workspace = (real_t *)gk_malloc(wsize, "WavefrontDiffusion: workspace"); + cand = ikvmalloc(nvtxs, "WavefrontDiffusion: cand"); - /*****************************/ /* Populate empty subdomains */ - /*****************************/ - idxset(nparts, 0, psize); + iset(nparts, 0, psize); for (i=0; i 0) + continue; + + maxd = iargmax(nparts, psize, 1); + if (psize[maxd] == 1) + break; /* we cannot do anything if the heaviest subdomain contains one vertex! */ + for (i=0; itpwgts, 0); - done = 0; /* zero out the tmpvec array */ - sset(nparts, 0.0, tmpvec); + rset(nparts, 0.0, tmpvec); - npasses = amin(nparts/2, NGD_PASSES); - for (l=0; lmype); - goto CleanUpAndExit; + cost = (real_t)(ctrl->mype); + break; } } - ConjGrad2(&matrix, load, solution, 0.001, workspace); - ComputeTransferVector(1, &matrix, solution, transfer, 0); - - GetThreeMax(nparts, load, &first, &second, &third); - - if (l%3 == 0) { - FastRandomPermute(nvtxs, perm, 1); - } - else { - /*****************************/ - /* move dirty vertices first */ - /*****************************/ - ndirty = 0; - for (i=0; imype == 0) { - for (j=nvtxs, k=0, ii=0; iimype == 0) ? cand[ii].val : perm[ii]; - from = where[i]; - - /* don't move out the last vertex in a subdomain */ - if (psize[from] == 1) - continue; - - clean = (from == home[i]) ? 1 : 0; - - /* only move from top three or dirty vertices */ - if (from != first && from != second && from != third && clean) - continue; - - /* Scatter the sparse transfer row into the dense tmpvec row */ - for (j=rowptr[from]+1; j (flowFactor * nvwgt[i])) { - tmpvec[to] -= nvwgt[i]; - INC_DEC(psize[to], psize[from], 1); - INC_DEC(npwgts[to], npwgts[from], nvwgt[i]); - INC_DEC(load[to], load[from], nvwgt[i]); - where[i] = to; - nswaps++; - - /* Update external degrees */ - ed[i] = 0; - for (k=xadj[i]; kmype == 0) { + for (j=nvtxs, k=0, ii=0; iimype == 0) ? cand[ii].val : perm[ii]; + from = where[i]; + + /* don't move out the last vertex in a subdomain */ + if (psize[from] == 1) + continue; + + clean = (from == home[i]) ? 1 : 0; + + /* only move from top three or dirty vertices */ + if (from != first && from != second && from != third && clean) + continue; + + /* Scatter the sparse transfer row into the dense tmpvec row */ + for (j=rowptr[from]+1; j (flowFactor * nvwgt[i])) { + tmpvec[to] -= nvwgt[i]; + INC_DEC(psize[to], psize[from], 1); + INC_DEC(npwgts[to], npwgts[from], nvwgt[i]); + INC_DEC(load[to], load[from], nvwgt[i]); + where[i] = to; + nswaps++; + + /* Update external degrees */ + ed[i] = 0; + for (k=xadj[i]; k 0) break; - noswaps = (nswaps > 0) ? 0 : 1; + noswaps = (nswaps > 0 ? 0 : 1); if (GlobalSESum(ctrl, noswaps) > ctrl->npes/2) break; - } } graph->mincut = ComputeSerialEdgeCut(graph); totalv = Mc_ComputeSerialTotalV(graph, home); - cost = ctrl->ipc_factor * (float)graph->mincut + ctrl->redist_factor * (float)totalv; + cost = ctrl->ipc_factor * (real_t)graph->mincut + ctrl->redist_factor * (real_t)totalv; CleanUpAndExit: - GKfree((void **)&solution, (void **)&perm, (void **)&workspace, (void **)&cand, LTERM); + gk_free((void **)&solution, (void **)&perm, (void **)&workspace, (void **)&cand, LTERM); return cost; } diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/weird.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/weird.c index 7581f227..be23b55a 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/weird.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/weird.c @@ -8,268 +8,512 @@ * Started 10/19/96 * George * - * $Id: weird.c,v 1.9 2003/07/31 16:27:28 karypis Exp $ + * $Id: weird.c 10592 2011-07-16 21:17:53Z karypis $ * */ #include +#define RIFN(x) \ + if ((x) == NULL) {\ + printf("PARMETIS ERROR " #x " is NULL.\n");\ + return 0;\ + } +#define RIFNP(x) \ + if ((*x) <= 0) {\ + printf("PARMETIS ERROR " #x " is <= 0.\n");\ + return 0;\ + } -/************************************************************************* -* This function computes a partitioning of a small graph -**************************************************************************/ -void PartitionSmallGraph(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) -{ - int i, h, ncon, nparts, npes, mype; - int moptions[10]; - int mynumflag, mywgtflag, me; - idxtype *mypart; - int lpecut[2], gpecut[2]; - GraphType *agraph; - int *sendcounts, *displs; - float *mytpwgts, *gnpwgts, *lnpwgts; - - ncon = graph->ncon; - nparts = ctrl->nparts; - - MPI_Comm_size(ctrl->comm, &npes); - MPI_Comm_rank(ctrl->comm, &mype); - SetUp(ctrl, graph, wspace); - graph->where = idxmalloc(graph->nvtxs+graph->nrecv, "PartitionSmallGraph: where"); - agraph = Mc_AssembleAdaptiveGraph(ctrl, graph, wspace); - mypart = idxmalloc(agraph->nvtxs, "mypart"); +/*************************************************************************/ +/*! This function checks the validity of the inputs for PartKway + */ +/*************************************************************************/ +int CheckInputsPartKway(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t *adjwgt, idx_t *wgtflag, idx_t *numflag, idx_t *ncon, idx_t *nparts, + real_t *tpwgts, real_t *ubvec, idx_t *options, idx_t *edgecut, idx_t *part, + MPI_Comm *comm) +{ + idx_t i, j, mype; + real_t sum; - moptions[0] = 0; - moptions[7] = ctrl->sync + mype; - mynumflag = 0; - mywgtflag = 3; - if (ncon == 1) { - METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt, - agraph->adjwgt, &mywgtflag, &mynumflag, &nparts, ctrl->tpwgts, moptions, - &graph->mincut, mypart); + /* Check that the supplied information is actually non-NULL */ + if (comm == NULL) { + printf("PARMETIS ERROR: comm is NULL. Aborting\n"); + abort(); + } + gkMPI_Comm_rank(*comm, &mype); + + RIFN(vtxdist); + RIFN(xadj); + RIFN(adjncy); + RIFN(wgtflag); + RIFN(numflag); + RIFN(ncon); + RIFN(nparts); + RIFN(tpwgts); + RIFN(ubvec); + RIFN(options); + RIFN(edgecut); + RIFN(part); + + if (*wgtflag == 2 || *wgtflag == 3) { + RIFN(vwgt); + for (j=0; j<*ncon; j++) { + if (GlobalSESumComm(*comm, isum(vtxdist[mype+1]-vtxdist[mype], vwgt+j, *ncon)) == 0) { + printf("PARMETIS ERROR: sum weight for constraint %"PRIDX" is zero.\n", j); + return 0; + } + } } - else { - mytpwgts = fmalloc(nparts, "mytpwgts"); - for (i=0; itpwgts[i*ncon]; + if (*wgtflag == 1 || *wgtflag == 3) + RIFN(adjwgt); - METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, - agraph->vwgt, agraph->adjwgt, &mywgtflag, &mynumflag, &nparts, mytpwgts, - moptions, &graph->mincut, mypart); - GKfree((void **)&mytpwgts, LTERM); + /* Check that the supplied information is actually valid/reasonable */ + if (vtxdist[mype+1]-vtxdist[mype] < 1) { + printf("PARMETIS ERROR: Poor initial vertex distribution. " + "Processor %"PRIDX" has no vertices assigned to it!\n", mype); + return 0; } - lpecut[0] = graph->mincut; - lpecut[1] = mype; - MPI_Allreduce(lpecut, gpecut, 1, MPI_2INT, MPI_MINLOC, ctrl->comm); - graph->mincut = gpecut[0]; + RIFNP(ncon); + RIFNP(nparts); - if (lpecut[1] == gpecut[1] && gpecut[1] != 0) - MPI_Send((void *)mypart, agraph->nvtxs, IDX_DATATYPE, 0, 1, ctrl->comm); - if (lpecut[1] == 0 && gpecut[1] != 0) - MPI_Recv((void *)mypart, agraph->nvtxs, IDX_DATATYPE, gpecut[1], 1, ctrl->comm, &ctrl->status); - - sendcounts = imalloc(npes, "sendcounts"); - displs = imalloc(npes, "displs"); - for (i=0; ivtxdist[i+1]-graph->vtxdist[i]; - displs[i] = graph->vtxdist[i]; + for (j=0; j<*ncon; j++) { + sum = rsum(*nparts, tpwgts+j, *ncon); + if (sum < 0.999 || sum > 1.001) { + printf("PARMETIS ERROR: The sum of tpwgts for constraint #%"PRIDX" is not 1.0\n", j); + return 0; + } + } + for (j=0; j<*ncon; j++) { + for (i=0; i<*nparts; i++) { + if (tpwgts[i*(*ncon)+j] < 0.0 || tpwgts[i] > 1.001) { + printf("PARMETIS ERROR: The tpwgts for constraint #%"PRIDX" and partition #%"PRIDX" is out of bounds.\n", j, i); + return 0; + } + } } - MPI_Scatterv((void *)mypart, sendcounts, displs, IDX_DATATYPE, - (void *)graph->where, graph->nvtxs, IDX_DATATYPE, 0, ctrl->comm); - lnpwgts = graph->lnpwgts = fmalloc(nparts*ncon, "lnpwgts"); - gnpwgts = graph->gnpwgts = fmalloc(nparts*ncon, "gnpwgts"); - sset(nparts*ncon, 0, lnpwgts); - for (i=0; invtxs; i++) { - me = graph->where[i]; - for (h=0; hnvwgt[i*ncon+h]; + for (j=0; j<*ncon; j++) { + if (ubvec[j] <= 1.0) { + printf("PARMETIS ERROR: The ubvec for constraint #%"PRIDX" must be > 1.0\n", j); + return 0; + } } - MPI_Allreduce((void *)lnpwgts, (void *)gnpwgts, nparts*ncon, MPI_FLOAT, MPI_SUM, ctrl->comm); - GKfree((void**)&mypart, (void**)&sendcounts, (void**)&displs, LTERM); - FreeGraph(agraph); - return; + return 1; } - -/************************************************************************* -* This function checks the inputs for the partitioning routines -**************************************************************************/ -void CheckInputs(int partType, int npes, int dbglvl, int *wgtflag, int *iwgtflag, - int *numflag, int *inumflag, int *ncon, int *incon, int *nparts, - int *inparts, float *tpwgts, float **itpwgts, float *ubvec, - float *iubvec, float *ipc2redist, float *iipc2redist, int *options, - int *ioptions, idxtype *part, MPI_Comm *comm) +/*************************************************************************/ +/*! This function checks the validity of the inputs for PartGeomKway + */ +/*************************************************************************/ +int CheckInputsPartGeomKway(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t *adjwgt, idx_t *wgtflag, idx_t *numflag, idx_t *ndims, real_t *xyz, + idx_t *ncon, idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, + idx_t *edgecut, idx_t *part, MPI_Comm *comm) { - int i, j; - int doweabort, doiabort = 0; - float tsum, *myitpwgts; - int mgcnums[5] = {-1, 2, 3, 4, 2}; + idx_t i, j, mype; + real_t sum; - /**************************************/ - if (part == NULL) { - doiabort = 1; - IFSET(dbglvl, DBG_INFO, printf("ERROR: part array is set to NULL.\n")); + /* Check that the supplied information is actually non-NULL */ + if (comm == NULL) { + printf("PARMETIS ERROR: comm is NULL. Aborting\n"); + abort(); + } + gkMPI_Comm_rank(*comm, &mype); + + RIFN(vtxdist); + RIFN(xadj); + RIFN(adjncy); + RIFN(xyz); + RIFN(ndims); + RIFN(wgtflag); + RIFN(numflag); + RIFN(ncon); + RIFN(nparts); + RIFN(tpwgts); + RIFN(ubvec); + RIFN(options); + RIFN(edgecut); + RIFN(part); + + if (*wgtflag == 2 || *wgtflag == 3) { + RIFN(vwgt); + for (j=0; j<*ncon; j++) { + if (GlobalSESumComm(*comm, isum(vtxdist[mype+1]-vtxdist[mype], vwgt+j, *ncon)) == 0) { + printf("PARMETIS ERROR: sum weight for constraint %"PRIDX" is zero.\n", j); + return 0; + } + } } - /**************************************/ + if (*wgtflag == 1 || *wgtflag == 3) + RIFN(adjwgt); - /**************************************/ - if (wgtflag == NULL) { - *iwgtflag = 0; - IFSET(dbglvl, DBG_INFO, printf("WARNING: wgtflag is NULL. Using a value of 0.\n")); + /* Check that the supplied information is actually valid/reasonable */ + if (vtxdist[mype+1]-vtxdist[mype] < 1) { + printf("PARMETIS ERROR: Poor initial vertex distribution. " + "Processor %"PRIDX" has no vertices assigned to it!\n", mype); + return 0; } - else { - *iwgtflag = *wgtflag; - } - /**************************************/ + RIFNP(ncon); + RIFNP(nparts); + RIFNP(ndims); - /**************************************/ - if (numflag == NULL) { - *inumflag = 0; - IFSET(dbglvl, DBG_INFO, printf("WARNING: numflag is NULL. Using a value of 0.\n")); + if (*ndims > 3) { + printf("PARMETIS ERROR: The ndims should be <= 3.\n"); + return 0; } - else { - if (*numflag != 0 && *numflag != 1) { - IFSET(dbglvl, DBG_INFO, printf("WARNING: bad value for numflag %d. Using a value of 0.\n", *numflag)); - *inumflag = 0; + + for (j=0; j<*ncon; j++) { + sum = rsum(*nparts, tpwgts+j, *ncon); + if (sum < 0.999 || sum > 1.001) { + printf("PARMETIS ERROR: The sum of tpwgts for constraint #%"PRIDX" is not 1.0\n", j); + return 0; } - else { - *inumflag = *numflag; + } + for (j=0; j<*ncon; j++) { + for (i=0; i<*nparts; i++) { + if (tpwgts[i*(*ncon)+j] < 0.0 || tpwgts[i] > 1.001) { + printf("PARMETIS ERROR: The tpwgts for constraint #%"PRIDX" and partition #%"PRIDX" is out of bounds.\n", j, i); + return 0; + } } } - /**************************************/ - /**************************************/ - if (ncon == NULL) { - *incon = 1; - IFSET(dbglvl, DBG_INFO, printf("WARNING: ncon is NULL. Using a value of 1.\n")); - } - else { - if (*ncon < 1 || *ncon > MAXNCON) { - IFSET(dbglvl, DBG_INFO, printf("WARNING: bad value for ncon %d. Using a value of 1.\n", *ncon)); - *incon = 1; - } - else { - *incon = *ncon; + for (j=0; j<*ncon; j++) { + if (ubvec[j] <= 1.0) { + printf("PARMETIS ERROR: The ubvec for constraint #%"PRIDX" must be > 1.0\n", j); + return 0; } } - /**************************************/ + return 1; +} - /**************************************/ - if (nparts == NULL) { - *inparts = npes; - IFSET(dbglvl, DBG_INFO, printf("WARNING: nparts is NULL. Using a value of %d.\n", npes)); + +/*************************************************************************/ +/*! This function checks the validity of the inputs for PartGeom + */ +/*************************************************************************/ +int CheckInputsPartGeom(idx_t *vtxdist, idx_t *ndims, real_t *xyz, + idx_t *part, MPI_Comm *comm) +{ + idx_t mype; + + /* Check that the supplied information is actually non-NULL */ + if (comm == NULL) { + printf("PARMETIS ERROR: comm is NULL. Aborting\n"); + abort(); } - else { - if (*nparts < 1 || *nparts > MAX_NPARTS) { - IFSET(dbglvl, DBG_INFO, printf("WARNING: bad value for nparts %d. Using a value of %d.\n", *nparts, npes)); - *inparts = npes; - } - else { - *inparts = *nparts; - } + + RIFN(vtxdist); + RIFN(xyz); + RIFN(ndims); + RIFN(part); + + /* Check that the supplied information is actually valid/reasonable */ + gkMPI_Comm_rank(*comm, &mype); + if (vtxdist[mype+1]-vtxdist[mype] < 1) { + printf("PARMETIS ERROR: Poor initial vertex distribution. " + "Processor %"PRIDX" has no vertices assigned to it!\n", mype); + return 0; } - /**************************************/ - - - /**************************************/ - myitpwgts = *itpwgts = fmalloc((*inparts)*(*incon), "CheckInputs: itpwgts"); - if (tpwgts == NULL) { - sset((*inparts)*(*incon), 1.0/(float)(*inparts), myitpwgts); - IFSET(dbglvl, DBG_INFO, printf("WARNING: tpwgts is NULL. Setting all array elements to %.3f.\n", 1.0/(float)(*inparts))); - } - else { - for (i=0; i<*incon; i++) { - tsum = 0.0; - for (j=0; j<*inparts; j++) { - tsum += tpwgts[j*(*incon)+i]; - } - if (fabs(1.0-tsum) < SMALLFLOAT) - tsum = 1.0; - for (j=0; j<*inparts; j++) - myitpwgts[j*(*incon)+i] = tpwgts[j*(*incon)+i] / tsum; - } + + RIFNP(ndims); + + if (*ndims > 3) { + printf("PARMETIS ERROR: The ndims should be <= 3.\n"); + return 0; } - /**************************************/ + return 1; +} + + +/*************************************************************************/ +/*! This function checks the validity of the inputs for AdaptiveRepart + */ +/*************************************************************************/ +int CheckInputsAdaptiveRepart(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, + idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, idx_t *wgtflag, + idx_t *numflag, idx_t *ncon, idx_t *nparts, real_t *tpwgts, + real_t *ubvec, real_t *ipc2redist, idx_t *options, idx_t *edgecut, + idx_t *part, MPI_Comm *comm) +{ + idx_t i, j, mype; + real_t sum; - /**************************************/ - if (ubvec == NULL) { - sset(*incon, 1.05, iubvec); - IFSET(dbglvl, DBG_INFO, printf("WARNING: ubvec is NULL. Setting all array elements to 1.05.\n")); + /* Check that the supplied information is actually non-NULL */ + if (comm == NULL) { + printf("PARMETIS ERROR: comm is NULL. Aborting\n"); + abort(); } - else { - for (i=0; i<*incon; i++) { - if (ubvec[i] < 1.0 || ubvec[i] > (float)(*inparts)) { - iubvec[i] = 1.05; - IFSET(dbglvl, DBG_INFO, printf("WARNING: bad value for ubvec[%d]: %.3f. Setting value to 1.05.[%d]\n", i, ubvec[i], *inparts)); - } - else { - iubvec[i] = ubvec[i]; + gkMPI_Comm_rank(*comm, &mype); + + RIFN(vtxdist); + RIFN(xadj); + RIFN(adjncy); + /*RIFN(vsize);*/ + RIFN(wgtflag); + RIFN(numflag); + RIFN(ncon); + RIFN(nparts); + RIFN(tpwgts); + RIFN(ubvec); + RIFN(options); + RIFN(edgecut); + RIFN(part); + + if (*wgtflag == 2 || *wgtflag == 3) { + RIFN(vwgt); + for (j=0; j<*ncon; j++) { + if (GlobalSESumComm(*comm, isum(vtxdist[mype+1]-vtxdist[mype], vwgt+j, *ncon)) == 0) { + printf("PARMETIS ERROR: sum weight for constraint %"PRIDX" is zero.\n", j); + return 0; } } } - /**************************************/ + if (*wgtflag == 1 || *wgtflag == 3) + RIFN(adjwgt); - /**************************************/ - if (partType == ADAPTIVE_PARTITION) { - if (ipc2redist != NULL) { - if (*ipc2redist < SMALLFLOAT || *ipc2redist > 1000000.0) { - IFSET(dbglvl, DBG_INFO, printf("WARNING: bad value for ipc2redist %.3f. Using a value of 1000.\n", *ipc2redist)); - *iipc2redist = 1000.0; - } - else { - *iipc2redist = *ipc2redist; + /* Check that the supplied information is actually valid/reasonable */ + if (vtxdist[mype+1]-vtxdist[mype] < 1) { + printf("PARMETIS ERROR: Poor initial vertex distribution. " + "Processor %"PRIDX" has no vertices assigned to it!\n", mype); + return 0; + } + + RIFNP(ncon); + RIFNP(nparts); + + + for (j=0; j<*ncon; j++) { + sum = rsum(*nparts, tpwgts+j, *ncon); + if (sum < 0.999 || sum > 1.001) { + printf("PARMETIS ERROR: The sum of tpwgts for constraint #%"PRIDX" is not 1.0\n", j); + return 0; + } + } + for (j=0; j<*ncon; j++) { + for (i=0; i<*nparts; i++) { + if (tpwgts[i*(*ncon)+j] < 0.0 || tpwgts[i] > 1.001) { + printf("PARMETIS ERROR: The tpwgts for constraint #%"PRIDX" and partition #%"PRIDX" is out of bounds.\n", j, i); + return 0; } } - else { - IFSET(dbglvl, DBG_INFO, printf("WARNING: ipc2redist is NULL. Using a value of 1000.\n")); - *iipc2redist = 1000.0; + } + + + for (j=0; j<*ncon; j++) { + if (ubvec[j] <= 1.0) { + printf("PARMETIS ERROR: The ubvec for constraint #%"PRIDX" must be > 1.0\n", j); + return 0; } } - /**************************************/ + + if (*ipc2redist < 0.0001 || *ipc2redist > 1000000.0) { + printf("PARMETIS ERROR: The ipc2redist value should be between [0.0001, 1000000.0]\n"); + return 0; + } + + return 1; +} - /**************************************/ - if (options == NULL) { - ioptions[0] = 0; - IFSET(dbglvl, DBG_INFO, printf("WARNING: options is NULL. Using defaults\n")); +/*************************************************************************/ +/*! This function checks the validity of the inputs for NodeND + */ +/*************************************************************************/ +int CheckInputsNodeND(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, + idx_t *numflag, idx_t *options, idx_t *order, idx_t *sizes, + MPI_Comm *comm) +{ + idx_t mype; + + /* Check that the supplied information is actually non-NULL */ + if (comm == NULL) { + printf("PARMETIS ERROR: comm is NULL. Aborting\n"); + abort(); } - else { - ioptions[0] = options[0]; - ioptions[1] = options[1]; - ioptions[2] = options[2]; - if (partType == ADAPTIVE_PARTITION || partType == REFINE_PARTITION) - ioptions[3] = options[3]; + + RIFN(vtxdist); + RIFN(xadj); + RIFN(adjncy); + RIFN(numflag); + RIFN(options); + RIFN(order); + RIFN(sizes); + + /* Check that the supplied information is actually valid/reasonable */ + gkMPI_Comm_rank(*comm, &mype); + if (vtxdist[mype+1]-vtxdist[mype] < 1) { + printf("PARMETIS ERROR: Poor initial vertex distribution. " + "Processor %"PRIDX" has no vertices assigned to it!\n", mype); + return 0; } - /**************************************/ + + return 1; +} - /**************************************/ + +/*************************************************************************/ +/*! This function checks the validity of the inputs for PartMeshKway + */ +/*************************************************************************/ +int CheckInputsPartMeshKway(idx_t *elmdist, idx_t *eptr, idx_t *eind, idx_t *elmwgt, + idx_t *wgtflag, idx_t *numflag, idx_t *ncon, idx_t *ncommon, idx_t *nparts, + real_t *tpwgts, real_t *ubvec, idx_t *options, idx_t *edgecut, idx_t *part, + MPI_Comm *comm) +{ + idx_t i, j, mype; + real_t sum; + + /* Check that the supplied information is actually non-NULL */ if (comm == NULL) { - IFSET(dbglvl, DBG_INFO, printf("ERROR: comm is NULL. Aborting\n")); + printf("PARMETIS ERROR: comm is NULL. Aborting\n"); abort(); } - else { - MPI_Allreduce((void *)&doiabort, (void *)&doweabort, 1, MPI_INT, MPI_MAX, *comm); - if (doweabort > 0) - abort(); + + RIFN(elmdist); + RIFN(eptr); + RIFN(eind); + RIFN(wgtflag); + RIFN(numflag); + RIFN(ncon); + RIFN(nparts); + RIFN(tpwgts); + RIFN(ubvec); + RIFN(options); + RIFN(edgecut); + RIFN(part); + + if (*wgtflag == 2 || *wgtflag == 3) + RIFN(elmwgt); + + + /* Check that the supplied information is actually valid/reasonable */ + gkMPI_Comm_rank(*comm, &mype); + if (elmdist[mype+1]-elmdist[mype] < 1) { + printf("PARMETIS ERROR: Poor initial element distribution. " + "Processor %"PRIDX" has no elements assigned to it!\n", mype); + return 0; } - /**************************************/ + RIFNP(ncon); + RIFNP(nparts); + + + for (j=0; j<*ncon; j++) { + sum = rsum(*nparts, tpwgts+j, *ncon); + if (sum < 0.999 || sum > 1.001) { + printf("PARMETIS ERROR: The sum of tpwgts for constraint #%"PRIDX" is not 1.0\n", j); + return 0; + } + } + for (j=0; j<*ncon; j++) { + for (i=0; i<*nparts; i++) { + if (tpwgts[i*(*ncon)+j] < 0.0 || tpwgts[i] > 1.001) { + printf("PARMETIS ERROR: The tpwgts for constraint #%"PRIDX" and partition #%"PRIDX" is out of bounds.\n", j, i); + return 0; + } + } + } + + + for (j=0; j<*ncon; j++) { + if (ubvec[j] <= 1.0) { + printf("PARMETIS ERROR: The ubvec for constraint #%"PRIDX" must be > 1.0\n", j); + return 0; + } + } + + return 1; } + +/*************************************************************************/ +/*! This function computes a partitioning of a small graph + */ +/*************************************************************************/ +void PartitionSmallGraph(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, h, ncon, nparts, npes, mype; + idx_t moptions[METIS_NOPTIONS]; + idx_t me; + idx_t *mypart; + int lpecut[2], gpecut[2]; + graph_t *agraph; + idx_t *sendcounts, *displs; + real_t *gnpwgts, *lnpwgts; + + ncon = graph->ncon; + nparts = ctrl->nparts; + npes = ctrl->npes; + mype = ctrl->mype; + + WCOREPUSH; + + CommSetup(ctrl, graph); + graph->where = imalloc(graph->nvtxs+graph->nrecv, "PartitionSmallGraph: where"); + agraph = AssembleAdaptiveGraph(ctrl, graph); + mypart = iwspacemalloc(ctrl, agraph->nvtxs); + + METIS_SetDefaultOptions(moptions); + moptions[METIS_OPTION_SEED] = ctrl->sync + mype; + + METIS_PartGraphKway(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, + agraph->vwgt, NULL, agraph->adjwgt, &nparts, ctrl->tpwgts, NULL, + moptions, &graph->mincut, mypart); + + lpecut[0] = graph->mincut; + lpecut[1] = mype; + gkMPI_Allreduce(lpecut, gpecut, 1, MPI_2INT, MPI_MINLOC, ctrl->comm); + graph->mincut = gpecut[0]; + + if (lpecut[1] == gpecut[1] && gpecut[1] != 0) + gkMPI_Send((void *)mypart, agraph->nvtxs, IDX_T, 0, 1, ctrl->comm); + if (lpecut[1] == 0 && gpecut[1] != 0) + gkMPI_Recv((void *)mypart, agraph->nvtxs, IDX_T, gpecut[1], 1, ctrl->comm, &ctrl->status); + + sendcounts = iwspacemalloc(ctrl, npes); + displs = iwspacemalloc(ctrl, npes); + + for (i=0; ivtxdist[i+1]-graph->vtxdist[i]; + displs[i] = graph->vtxdist[i]; + } + + gkMPI_Scatterv((void *)mypart, sendcounts, displs, IDX_T, + (void *)graph->where, graph->nvtxs, IDX_T, 0, ctrl->comm); + + lnpwgts = graph->lnpwgts = rmalloc(nparts*ncon, "lnpwgts"); + gnpwgts = graph->gnpwgts = rmalloc(nparts*ncon, "gnpwgts"); + rset(nparts*ncon, 0, lnpwgts); + for (i=0; invtxs; i++) { + me = graph->where[i]; + for (h=0; hnvwgt[i*ncon+h]; + } + gkMPI_Allreduce((void *)lnpwgts, (void *)gnpwgts, nparts*ncon, REAL_T, MPI_SUM, ctrl->comm); + + FreeGraph(agraph); + + WCOREPOP; + + return; +} + diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/wspace.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/wspace.c new file mode 100644 index 00000000..b9f6d696 --- /dev/null +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/wspace.c @@ -0,0 +1,138 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * memory.c + * + * This file contains routines that deal with memory allocation + * + * Started 2/24/96 + * George + * + * $Id: wspace.c 10540 2011-07-11 15:42:13Z karypis $ + * + */ + +#include + + +/*************************************************************************/ +/*! This function allocate various pools of memory */ +/*************************************************************************/ +void AllocateWSpace(ctrl_t *ctrl, size_t nwords) +{ + ctrl->mcore = gk_mcoreCreate(nwords*sizeof(idx_t)); +} + + +/*************************************************************************/ +/*! This function allocates refinement-specific memory for the workspace */ +/*************************************************************************/ +void AllocateRefinementWorkSpace(ctrl_t *ctrl, idx_t nbrpoolsize) +{ + ctrl->nbrpoolsize = nbrpoolsize; + ctrl->nbrpoolcpos = 0; + ctrl->nbrpoolreallocs = 0; + + ctrl->cnbrpool = (cnbr_t *)gk_malloc(ctrl->nbrpoolsize*sizeof(cnbr_t), + "AllocateRefinementWorkSpace: cnbrpool"); + +} + + +/*************************************************************************/ +/*! This function de-allocate various pools of memory */ +/**************************************************************************/ +void FreeWSpace(ctrl_t *ctrl) +{ + ctrl->dbglvl = 0; + gk_mcoreDestroy(&ctrl->mcore, (ctrl->dbglvl&DBG_INFO)); + + if (ctrl->dbglvl&DBG_INFO) { + printf(" nbrpool statistics [pe:%"PRIDX"]\n" + " nbrpoolsize: %12zu nbrpoolcpos: %12zu\n" + " nbrpoolreallocs: %12zu\n\n", + ctrl->mype, ctrl->nbrpoolsize, ctrl->nbrpoolcpos, + ctrl->nbrpoolreallocs); + } + + gk_free((void **)&ctrl->cnbrpool, LTERM); + ctrl->nbrpoolsize = 0; + ctrl->nbrpoolcpos = 0; + +} + + +/*************************************************************************/ +/*! This function allocate space from the workspace/heap */ +/*************************************************************************/ +void *wspacemalloc(ctrl_t *ctrl, size_t nbytes) +{ + return gk_mcoreMalloc(ctrl->mcore, nbytes); +} + + +/*************************************************************************/ +/*! This function allocate space from the core */ +/*************************************************************************/ +idx_t *iwspacemalloc(ctrl_t *ctrl, size_t n) +{ + return (idx_t *)wspacemalloc(ctrl, n*sizeof(idx_t)); +} + +/*************************************************************************/ +/*! This function resets the cnbrpool */ +/*************************************************************************/ +void cnbrpoolReset(ctrl_t *ctrl) +{ + ctrl->nbrpoolcpos = 0; +} + + +/*************************************************************************/ +/*! This function gets the next free index from cnbrpool */ +/*************************************************************************/ +idx_t cnbrpoolGetNext(ctrl_t *ctrl, idx_t nnbrs) +{ + nnbrs = gk_min(ctrl->nparts, nnbrs); + + ctrl->nbrpoolcpos += nnbrs; + + if (ctrl->nbrpoolcpos > ctrl->nbrpoolsize) { + ctrl->nbrpoolsize += gk_max(10*nnbrs, ctrl->nbrpoolsize/2); + + ctrl->cnbrpool = (cnbr_t *)gk_realloc(ctrl->cnbrpool, + ctrl->nbrpoolsize*sizeof(cnbr_t), "cnbrpoolGet: cnbrpool"); + ctrl->nbrpoolreallocs++; + } + + return ctrl->nbrpoolcpos - nnbrs; +} + + +/*************************************************************************/ +/*! This function allocate space from the core */ +/*************************************************************************/ +real_t *rwspacemalloc(ctrl_t *ctrl, size_t n) +{ + return (real_t *)wspacemalloc(ctrl, n*sizeof(real_t)); +} + + +/*************************************************************************/ +/*! This function allocate space from the core */ +/*************************************************************************/ +ikv_t *ikvwspacemalloc(ctrl_t *ctrl, size_t n) +{ + return (ikv_t *)wspacemalloc(ctrl, n*sizeof(ikv_t)); +} + + +/*************************************************************************/ +/*! This function allocate space from the core */ +/*************************************************************************/ +rkv_t *rkvwspacemalloc(ctrl_t *ctrl, size_t n) +{ + return (rkv_t *)wspacemalloc(ctrl, n*sizeof(rkv_t)); +} + + diff --git a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/xyzpart.c b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/xyzpart.c index 8e80bf89..0f1d746f 100644 --- a/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/xyzpart.c +++ b/Code/ThirdParty/parmetis_svfsi/simvascular_parmetis_svfsi/ParMETISLib/xyzpart.c @@ -8,26 +8,28 @@ * Started 7/11/97 * George * - * $Id: xyzpart.c,v 1.3 2003/07/30 18:37:59 karypis Exp $ + * $Id: xyzpart.c 10755 2011-09-15 12:28:34Z karypis $ * */ #include -/************************************************************************* -* This function implements a simple coordinate based partitioning -**************************************************************************/ -void Coordinate_Partition(CtrlType *ctrl, GraphType *graph, int ndims, float *xyz, - int setup, WorkSpaceType *wspace) +/*************************************************************************/ +/*! This function implements a simple coordinate based partitioning +*/ +/*************************************************************************/ +void Coordinate_Partition(ctrl_t *ctrl, graph_t *graph, idx_t ndims, + real_t *xyz, idx_t setup) { - int i, j, k, nvtxs, firstvtx, icoord, coords[3]; - idxtype *vtxdist; - float max[3], min[3], gmin[3], gmax[3], shift[3], scale[3]; - KeyValueType *cand; + idx_t i, j, k, nvtxs, firstvtx, icoord, nbits; + idx_t *vtxdist, *bxyz; + ikv_t *cand; + + WCOREPUSH; if (setup) - SetUp(ctrl, graph, wspace); + CommSetup(ctrl, graph); else graph->nrecv = 0; @@ -35,74 +37,262 @@ void Coordinate_Partition(CtrlType *ctrl, GraphType *graph, int ndims, float *xy vtxdist = graph->vtxdist; firstvtx = vtxdist[ctrl->mype]; - cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "Coordinate_Partition: cand"); + cand = ikvwspacemalloc(ctrl, nvtxs); + bxyz = iwspacemalloc(ctrl, nvtxs*ndims); - /* Compute parameters for coordinate transformation */ - for (k=0; k max[k]) - max[k] = xyz[i*ndims+k]; + for (icoord=0, j=nbits-1; j>=0; j--) { + for (k=0; kcomm); - MPI_Allreduce((void *)max, (void *)gmax, ndims, MPI_FLOAT, MPI_MAX, ctrl->comm); + /* Partition using sorting */ + PseudoSampleSort(ctrl, graph, cand); - /* myprintf(ctrl, "Coordinate Range: %e %e, Global %e %e\n", min[0], max[0], gmin[0], gmax[0]); */ + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function maps the coordinates into bin numbers. + It starts with a uniform distribution of the max-min range and then + performs a number of iterations that adjust the bucket boundaries based + on the actual bucket counts. +*/ +/*************************************************************************/ +void IRBinCoordinates(ctrl_t *ctrl, graph_t *graph, idx_t ndims, real_t *xyz, + idx_t nbins, idx_t *bxyz) +{ + idx_t npes=ctrl->npes, mype=ctrl->mype; + idx_t i, j, k, l, gnvtxs, nvtxs; + idx_t csize, psize; + idx_t *vtxdist, *lcounts, *gcounts; + real_t gmin, gmax, *emarkers, *nemarkers; + rkv_t *cand; + WCOREPUSH; + + gnvtxs = graph->gnvtxs; + nvtxs = graph->nvtxs; + + cand = rkvwspacemalloc(ctrl, nvtxs); + lcounts = iwspacemalloc(ctrl, nbins); + gcounts = iwspacemalloc(ctrl, nbins); + emarkers = rwspacemalloc(ctrl, nbins+1); + nemarkers = rwspacemalloc(ctrl, nbins+1); + + + /* Go over each dimension */ for (k=0; kcomm); + gkMPI_Allreduce((void *)&cand[nvtxs-1].key, (void *)&gmax, 1, REAL_T, MPI_MAX, ctrl->comm); + + for (i=0; icomm); - switch (ctrl->xyztype) { - case XYZ_XCOORD: - for (i=0; i=0 && cand[i].key<=1000000); - cand[i].val = firstvtx+i; + /* + if (mype == 0) { + printf("Distribution [%"PRIDX"]...\n", l); + for (i=0; i %"PRIDX"\n", emarkers[i], emarkers[i+1], gcounts[i]); } - break; - case XYZ_SPFILL: - for (i=0; i=0; j--) { - for (k=0; knpes, mype=ctrl->mype; + idx_t i, j, k, l, gnvtxs, nvtxs, cnbins; + idx_t *vtxdist, *lcounts, *gcounts; + real_t sum, gmin, gmax, gsum, *emarkers, *nemarkers, *lsums, *gsums; + rkv_t *cand; + ikv_t *buckets; -} + WCOREPUSH; + + gnvtxs = graph->gnvtxs; + nvtxs = graph->nvtxs; + buckets = ikvwspacemalloc(ctrl, nbins); + cand = rkvwspacemalloc(ctrl, nvtxs); + lcounts = iwspacemalloc(ctrl, nbins); + gcounts = iwspacemalloc(ctrl, nbins); + lsums = rwspacemalloc(ctrl, nbins); + gsums = rwspacemalloc(ctrl, nbins); + emarkers = rwspacemalloc(ctrl, nbins+1); + nemarkers = rwspacemalloc(ctrl, nbins+1); + + + /* Go over each dimension */ + for (k=0; kcomm); + gkMPI_Allreduce((void *)&cand[nvtxs-1].key, (void *)&gmax, 1, REAL_T, MPI_MAX, ctrl->comm); + gkMPI_Allreduce((void *)&sum, (void *)&gsum, 1, REAL_T, MPI_MAX, ctrl->comm); + + emarkers[0] = gmin; + emarkers[1] = gsum/gnvtxs; + emarkers[2] = gmax*(1.0+2.0*REAL_EPSILON); + cnbins = 2; + + /* get into a iterative backet boundary refinement */ + while (cnbins < nbins) { + /* determine bucket counts */ + iset(cnbins, 0, lcounts); + rset(cnbins, 0, lsums); + for (j=0, i=0; icomm); + gkMPI_Allreduce((void *)lsums, (void *)gsums, cnbins, REAL_T, MPI_SUM, ctrl->comm); + + /* + if (mype == 0) { + printf("Distribution [%"PRIDX"]...\n", cnbins); + for (i=0; i %"PRIDX"\n", emarkers[i], emarkers[i+1], gcounts[i]); + } + */ + + + /* split over-weight buckets */ + for (i=0; i=0; i--, j++) { + l = buckets[i].val; + if (buckets[i].key > gnvtxs/nbins && cnbins < nbins) { + /* + if (mype == 0) + printf("\t\t %f %f\n", (float)emarkers[l], (float)emarkers[l+1]); + */ + nemarkers[j++] = (emarkers[l]+emarkers[l+1])/2; + cnbins++; + } + nemarkers[j] = emarkers[l]; + } + PASSERT(ctrl, cnbins == j); + + rsorti(cnbins, nemarkers); + rcopy(cnbins, nemarkers, emarkers); + emarkers[cnbins] = gmax*(1.0+2.0*REAL_EPSILON); + } + + /* assign the coordinate to the appropriate bin */ + for (j=0, i=0; inpes, mype=ctrl->mype, firstvtx, lastvtx; - idxtype *scounts, *rcounts, *vtxdist, *perm; - KeyValueType *relmnts, *mypicks, *allpicks; + idx_t i, j, k, nvtxs, nrecv, npes=ctrl->npes, mype=ctrl->mype, + firstvtx, lastvtx; + idx_t *scounts, *rcounts, *vtxdist, *perm; + ikv_t *relmnts, *mypicks, *allpicks; + + WCOREPUSH; + + CommUpdateNnbrs(ctrl, npes); nvtxs = graph->nvtxs; vtxdist = graph->vtxdist; - scounts = wspace->pv1; - rcounts = wspace->pv2; + /* get memory for the counts */ + scounts = iwspacemalloc(ctrl, npes+1); + rcounts = iwspacemalloc(ctrl, npes+1); - /* Allocate memory for the splitters */ - mypicks = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*(npes+1), "ParSort: mypicks"); - allpicks = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*npes*npes, "ParSort: allpicks"); + /* get memory for the splitters */ + mypicks = ikvwspacemalloc(ctrl, npes+1); + WCOREPUSH; /* for freeing allpicks */ + allpicks = ikvwspacemalloc(ctrl, npes*npes); /* Sort the local elements */ - ikeysort(nvtxs, elmnts); + ikvsorti(nvtxs, elmnts); /* Select the local npes-1 equally spaced elements */ for (i=1; icomm); + gkMPI_Allgather((void *)mypicks, 2*(npes-1), IDX_T, (void *)allpicks, + 2*(npes-1), IDX_T, ctrl->comm); /* PrintPairs(ctrl, npes*(npes-1), allpicks, "Allpicks"); */ /* Sort all the picks */ - ikeyvalsort(npes*(npes-1), allpicks); + ikvsortii(npes*(npes-1), allpicks); /* PrintPairs(ctrl, npes*(npes-1), allpicks, "Allpicks"); */ /* Select the final splitters. Set the boundaries to simplify coding */ for (i=1; icomm); + gkMPI_Alltoall(scounts, 1, IDX_T, rcounts, 1, IDX_T, ctrl->comm); + + MAKECSR(i, npes, scounts); + MAKECSR(i, npes, rcounts); /* - PrintVector(ctrl, npes, 0, scounts, "Scounts"); - PrintVector(ctrl, npes, 0, rcounts, "Rcounts"); + PrintVector(ctrl, npes+1, 0, scounts, "Scounts"); + PrintVector(ctrl, npes+1, 0, rcounts, "Rcounts"); */ /* Allocate memory for sorted elements and receive them */ - MAKECSR(i, npes, scounts); - MAKECSR(i, npes, rcounts); - nrecv = rcounts[npes]; - if (wspace->nlarge >= nrecv) - relmnts = (KeyValueType *)wspace->pairs; - else - relmnts = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*nrecv, "ParSort: relmnts"); + nrecv = rcounts[npes]; + relmnts = ikvwspacemalloc(ctrl, nrecv); /* Issue the receives first */ for (i=0; icomm, ctrl->rreq+i); + gkMPI_Irecv((void *)(relmnts+rcounts[i]), 2*(rcounts[i+1]-rcounts[i]), + IDX_T, i, 1, ctrl->comm, ctrl->rreq+i); /* Issue the sends next */ for (i=0; icomm, ctrl->sreq+i); + gkMPI_Isend((void *)(elmnts+scounts[i]), 2*(scounts[i+1]-scounts[i]), + IDX_T, i, 1, ctrl->comm, ctrl->sreq+i); - MPI_Waitall(npes, ctrl->rreq, ctrl->statuses); - MPI_Waitall(npes, ctrl->sreq, ctrl->statuses); + gkMPI_Waitall(npes, ctrl->rreq, ctrl->statuses); + gkMPI_Waitall(npes, ctrl->sreq, ctrl->statuses); /* OK, now do the local sort of the relmnts. Use perm to keep track original order */ - perm = idxmalloc(nrecv, "ParSort: perm"); + perm = iwspacemalloc(ctrl, nrecv); for (i=0; icomm); + gkMPI_Scan((void *)(&nrecv), (void *)(&lastvtx), 1, IDX_T, MPI_SUM, ctrl->comm); firstvtx = lastvtx-nrecv; - /*myprintf(ctrl, "first, last: %d %d\n", firstvtx, lastvtx); */ + /*myprintf(ctrl, "first, last: %"PRIDX" %"PRIDX"\n", firstvtx, lastvtx); */ for (j=0, i=0; i firstvtx) { /* Found the first PE that is passed me */ if (vtxdist[i+1] >= lastvtx) { - /* myprintf(ctrl, "Shifting %d elements to processor %d\n", lastvtx-firstvtx, i); */ + /* myprintf(ctrl, "Shifting %"PRIDX" elements to processor %"PRIDX"\n", lastvtx-firstvtx, i); */ for (k=0; k=0 && relmnts[i].key=0 && relmnts[i].keycomm, ctrl->rreq+i); + gkMPI_Irecv((void *)(elmnts+scounts[i]), 2*(scounts[i+1]-scounts[i]), IDX_T, + i, 1, ctrl->comm, ctrl->rreq+i); /* Issue the sends next */ for (i=0; icomm, ctrl->sreq+i); + gkMPI_Isend((void *)(relmnts+rcounts[i]), 2*(rcounts[i+1]-rcounts[i]), IDX_T, + i, 1, ctrl->comm, ctrl->sreq+i); - MPI_Waitall(npes, ctrl->rreq, ctrl->statuses); - MPI_Waitall(npes, ctrl->sreq, ctrl->statuses); + gkMPI_Waitall(npes, ctrl->rreq, ctrl->statuses); + gkMPI_Waitall(npes, ctrl->sreq, ctrl->statuses); /* Construct a partition for the graph */ - graph->where = idxmalloc(graph->nvtxs+graph->nrecv, "PartSort: graph->where"); + graph->where = imalloc(graph->nvtxs+graph->nrecv, "PartSort: graph->where"); firstvtx = vtxdist[mype]; for (i=0; i=0 && elmnts[i].key=vtxdist[mype] && elmnts[i].val=0 && elmnts[i].key=vtxdist[mype] && elmnts[i].valwhere[elmnts[i].val-firstvtx] = elmnts[i].key; } + WCOREPOP; +} + + +/**************************************************************************/ +/*! This function sorts a distributed list of ikv_t in increasing + order, and uses it to compute a partition. It uses a + samplesort variant whose number of local samples can potentially + be smaller than npes. +*/ +/**************************************************************************/ +void PseudoSampleSort(ctrl_t *ctrl, graph_t *graph, ikv_t *elmnts) +{ + idx_t npes=ctrl->npes, mype=ctrl->mype; + idx_t i, j, k, nlsamples, ntsamples, nvtxs, nrecv, firstvtx, lastvtx; + idx_t *scounts, *rcounts, *sdispls, *rdispls, *vtxdist, *perm; + ikv_t *relmnts, *mypicks, *allpicks; + +STARTTIMER(ctrl, ctrl->AuxTmr1); + + WCOREPUSH; + + nvtxs = graph->nvtxs; + vtxdist = graph->vtxdist; + + /* determine the number of local samples */ + //nlsamples = (GlobalSESum(ctrl, graph->nedges) + graph->gnvtxs)/(npes*npes); + nlsamples = graph->gnvtxs/(npes*npes); + if (nlsamples > npes) + nlsamples = npes; + else if (nlsamples < 75) + nlsamples = gk_min(75, npes); /* the 'npes' in the min is to account for small graphs */ + + + IFSET(ctrl->dbglvl, DBG_INFO, + rprintf(ctrl, "PseudoSampleSort: nlsamples=%"PRIDX" of %"PRIDX"\n", nlsamples, npes)); + + /* get memory for the counts and displacements */ + scounts = iwspacemalloc(ctrl, npes+1); + rcounts = iwspacemalloc(ctrl, npes+1); + sdispls = iwspacemalloc(ctrl, npes+1); + rdispls = iwspacemalloc(ctrl, npes+1); + + /* get memory for the splitters */ + mypicks = ikvwspacemalloc(ctrl, npes+1); + + WCOREPUSH; /* for freeing allpicks */ + allpicks = ikvwspacemalloc(ctrl, npes*nlsamples); + + /* Sort the local elements */ + ikvsorti(nvtxs, elmnts); + + /* Select the local nlsamples-1 equally spaced elements */ + for (i=0; i 0) { + k = (nvtxs/(3*nlsamples) /* initial offset */ + + i*nvtxs/nlsamples /* increament */ + + mype*nvtxs/(npes*nlsamples) /* per-pe shift for nlsamplesAuxTmr1); +STARTTIMER(ctrl, ctrl->AuxTmr2); + + /* Gather the picks to all the processors */ + gkMPI_Allgather((void *)mypicks, 2*(nlsamples-1), IDX_T, (void *)allpicks, + 2*(nlsamples-1), IDX_T, ctrl->comm); + + /* PrintPairs(ctrl, npes*(nlsamples-1), allpicks, "Allpicks"); */ + + /* Remove any samples that have .val == -1 */ + for (ntsamples=0, i=0; iAuxTmr2); +STARTTIMER(ctrl, ctrl->AuxTmr3); + + /* Compute the number of elements that belong to each bucket */ + iset(npes, 0, scounts); + for (j=i=0; icomm); + + /* multiply raw counts by 2 to account for the ikv_t type */ + sdispls[0] = rdispls[0] = 0; + for (i=0; iAuxTmr3); +STARTTIMER(ctrl, ctrl->AuxTmr4); + + /* + PrintVector(ctrl, npes+1, 0, scounts, "Scounts"); + PrintVector(ctrl, npes+1, 0, rcounts, "Rcounts"); + */ + + /* Allocate memory for sorted elements and receive them */ + nrecv = rdispls[npes]/2; /* The divide by 2 is to get the # of ikv_t elements */ + relmnts = ikvwspacemalloc(ctrl, nrecv); + + IFSET(ctrl->dbglvl, DBG_INFO, + rprintf(ctrl, "PseudoSampleSort: max_nrecv: %"PRIDX" of %"PRIDX"\n", + GlobalSEMax(ctrl, nrecv), graph->gnvtxs/npes)); + if (mype == 0 || mype == npes-1) + IFSET(ctrl->dbglvl, DBG_INFO, + myprintf(ctrl, "PseudoSampleSort: nrecv: %"PRIDX" of %"PRIDX"\n", + nrecv, graph->gnvtxs/npes)); + + + gkMPI_Alltoallv((void *)elmnts, scounts, sdispls, IDX_T, + (void *)relmnts, rcounts, rdispls, IDX_T, + ctrl->comm); - GKfree((void **)&mypicks, (void **)&allpicks, (void **)&perm, LTERM); - if (wspace->nlarge < nrecv) - GKfree((void **)&relmnts, LTERM); +STOPTIMER(ctrl, ctrl->AuxTmr4); +STARTTIMER(ctrl, ctrl->AuxTmr5); + /* OK, now do the local sort of the relmnts. Use perm to keep track original order */ + perm = iwspacemalloc(ctrl, nrecv); + for (i=0; icomm); + firstvtx = lastvtx-nrecv; + + for (j=0, i=0; i firstvtx) { /* Found the first PE that is passed me */ + if (vtxdist[i+1] >= lastvtx) { + /* myprintf(ctrl, "Shifting %"PRIDX" elements to processor %"PRIDX"\n", lastvtx-firstvtx, i); */ + for (k=0; k= lastvtx) + break; + } + + /* Reverse the ordering on the relmnts[].val */ + for (i=0; i=0 && relmnts[i].keyAuxTmr5); +STARTTIMER(ctrl, ctrl->AuxTmr6); + + /* OK, now sent it back. The role of send/recv arrays is now reversed. */ + gkMPI_Alltoallv((void *)relmnts, rcounts, rdispls, IDX_T, + (void *)elmnts, scounts, sdispls, IDX_T, + ctrl->comm); + + + /* Construct a partition for the graph */ + graph->where = imalloc(graph->nvtxs+graph->nrecv, "PartSort: graph->where"); + firstvtx = vtxdist[mype]; + for (i=0; i=0 && elmnts[i].key=vtxdist[mype] && elmnts[i].valwhere[elmnts[i].val-firstvtx] = elmnts[i].key; + } + + WCOREPOP; + +STOPTIMER(ctrl, ctrl->AuxTmr6); } +