diff --git a/Makefile b/Makefile index 6662b44f8..c98adc179 100644 --- a/Makefile +++ b/Makefile @@ -94,9 +94,24 @@ OBJ := $(filter-out $(BUILD_DIR)/nvmon.o,$(OBJ)) OBJ := $(filter-out $(BUILD_DIR)/topology_gpu.o,$(OBJ)) OBJ := $(filter-out $(BUILD_DIR)/libnvctr.o,$(OBJ)) endif +ifeq ($(COMPILER),GCCPOWER) +OBJ := $(filter-out $(BUILD_DIR)/topology_cpuid.o,$(OBJ)) +OBJ := $(filter-out $(BUILD_DIR)/access_x86.o,$(OBJ)) +OBJ := $(filter-out $(BUILD_DIR)/access_x86_msr.o,$(OBJ)) +OBJ := $(filter-out $(BUILD_DIR)/access_x86_pci.o,$(OBJ)) +OBJ := $(filter-out $(BUILD_DIR)/loadData.o,$(OBJ)) +endif +ifeq ($(COMPILER),XLC) +OBJ := $(filter-out $(BUILD_DIR)/topology_cpuid.o,$(OBJ)) +OBJ := $(filter-out $(BUILD_DIR)/access_x86.o,$(OBJ)) +OBJ := $(filter-out $(BUILD_DIR)/access_x86_msr.o,$(OBJ)) +OBJ := $(filter-out $(BUILD_DIR)/access_x86_pci.o,$(OBJ)) +OBJ := $(filter-out $(BUILD_DIR)/loadData.o,$(OBJ)) +endif PERFMONHEADERS = $(patsubst $(SRC_DIR)/includes/%.txt, $(BUILD_DIR)/%.h,$(wildcard $(SRC_DIR)/includes/*.txt)) OBJ_LUA = $(wildcard ./ext/lua/$(COMPILER)/*.o) OBJ_HWLOC = $(wildcard ./ext/hwloc/$(COMPILER)/*.o) +OBJ_GOTCHA = $(wildcard ./ext/GOTCHA/$(COMPILER)/*.o) FILTERS := $(filter-out ./filters/README,$(wildcard ./filters/*)) @@ -118,12 +133,12 @@ endif CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(INCLUDES) ifeq ($(BUILDDAEMON),false) -all: $(BUILD_DIR) $(PERFMONHEADERS) $(OBJ) $(TARGET_LIB) $(FORTRAN_IF) $(PINLIB) $(L_APPS) $(L_HELPER) $(FREQ_TARGET) $(BENCH_TARGET) +all: $(BUILD_DIR) $(PERFMONHEADERS) $(OBJ) $(TARGET_LIB) $(FORTRAN_IF) $(PINLIB) $(L_APPS) $(L_HELPER) $(FREQ_TARGET) $(BENCH_TARGET) $(APPDAEMON_TARGET) else ifeq ($(BUILDFREQ),false) -all: $(BUILD_DIR) $(PERFMONHEADERS) $(OBJ) $(TARGET_LIB) $(FORTRAN_IF) $(PINLIB) $(L_APPS) $(L_HELPER) $(DAEMON_TARGET) $(BENCH_TARGET) +all: $(BUILD_DIR) $(PERFMONHEADERS) $(OBJ) $(TARGET_LIB) $(FORTRAN_IF) $(PINLIB) $(L_APPS) $(L_HELPER) $(DAEMON_TARGET) $(BENCH_TARGET) $(APPDAEMON_TARGET) else -all: $(BUILD_DIR) $(PERFMONHEADERS) $(OBJ) $(TARGET_LIB) $(FORTRAN_IF) $(PINLIB) $(L_APPS) $(L_HELPER) $(DAEMON_TARGET) $(FREQ_TARGET) $(BENCH_TARGET) +all: $(BUILD_DIR) $(PERFMONHEADERS) $(OBJ) $(TARGET_LIB) $(FORTRAN_IF) $(PINLIB) $(L_APPS) $(L_HELPER) $(DAEMON_TARGET) $(FREQ_TARGET) $(BENCH_TARGET) $(APPDAEMON_TARGET) endif endif @@ -185,10 +200,14 @@ $(DAEMON_TARGET): $(SRC_DIR)/access-daemon/accessDaemon.c @echo "===> BUILD access daemon likwid-accessD" $(Q)$(MAKE) -C $(SRC_DIR)/access-daemon likwid-accessD -$(FREQ_TARGET): $(SRC_DIR)/access-daemon/setFreq.c +$(FREQ_TARGET): $(SRC_DIR)/access-daemon/setFreqDaemon.c @echo "===> BUILD frequency daemon likwid-setFreq" $(Q)$(MAKE) -C $(SRC_DIR)/access-daemon likwid-setFreq +$(APPDAEMON_TARGET): $(SRC_DIR)/access-daemon/appDaemon.c $(TARGET_GOTCHA_LIB) + @echo "===> BUILD application interface likwid-appDaemon.so" + $(Q)$(MAKE) -C $(SRC_DIR)/access-daemon likwid-appDaemon.so + $(BUILD_DIR): @mkdir $(BUILD_DIR) @@ -215,6 +234,11 @@ $(TARGET_LUA_LIB): @echo "===> EXTERNAL LUA" endif +$(TARGET_GOTCHA_LIB): + @echo "===> ENTER $(GOTCHA_FOLDER)" + $(Q)$(MAKE) --no-print-directory -C $(GOTCHA_FOLDER) $(MAKECMDGOALS) + + $(TARGET_HWLOC_LIB): @echo "===> ENTER $(HWLOC_FOLDER)" $(Q)$(MAKE) --no-print-directory -C $(HWLOC_FOLDER) $(MAKECMDGOALS) @@ -248,13 +272,13 @@ ifeq ($(findstring $(MAKECMDGOALS),clean),) -include $(OBJ:.o=.d) endif -.PHONY: clean distclean install uninstall help $(TARGET_LUA_LIB) $(TARGET_HWLOC_LIB) $(BENCH_TARGET) +.PHONY: clean distclean install uninstall help $(TARGET_LUA_LIB) $(TARGET_HWLOC_LIB) $(TARGET_GOTCHA_LIB) $(BENCH_TARGET) .PRECIOUS: $(BUILD_DIR)/%.pas .NOTPARALLEL: -clean: $(TARGET_LUA_LIB) $(TARGET_HWLOC_LIB) $(BENCH_TARGET) +clean: $(TARGET_LUA_LIB) $(TARGET_HWLOC_LIB) $(TARGET_GOTCHA_LIB) $(BENCH_TARGET) @echo "===> CLEAN" @for APP in $(L_APPS); do \ rm -f $$APP; \ @@ -264,10 +288,10 @@ clean: $(TARGET_LUA_LIB) $(TARGET_HWLOC_LIB) $(BENCH_TARGET) @rm -f $(DYNAMIC_TARGET_LIB)* @rm -f $(PINLIB)* @rm -f $(FORTRAN_IF_NAME) - @rm -f $(FREQ_TARGET) $(DAEMON_TARGET) + @rm -f $(FREQ_TARGET) $(DAEMON_TARGET) $(APPDAEMON_TARGET) @rm -f likwid-config.cmake -distclean: $(TARGET_LUA_LIB) $(TARGET_HWLOC_LIB) $(BENCH_TARGET) +distclean: $(TARGET_LUA_LIB) $(TARGET_HWLOC_LIB) $(TARGET_GOTCHA_LIB) $(BENCH_TARGET) @echo "===> DIST CLEAN" @for APP in $(L_APPS); do \ rm -f $$APP; \ @@ -277,10 +301,11 @@ distclean: $(TARGET_LUA_LIB) $(TARGET_HWLOC_LIB) $(BENCH_TARGET) @rm -f $(DYNAMIC_TARGET_LIB)* @rm -f $(PINLIB)* @rm -f $(FORTRAN_IF_NAME) - @rm -f $(FREQ_TARGET) $(DAEMON_TARGET) + @rm -f $(FREQ_TARGET) $(DAEMON_TARGET) $(APPDAEMON_TARGET) @rm -rf $(BUILD_DIR) @rm -rf $(TARGET_LUA_LIB).* $(shell basename $(TARGET_LUA_LIB)).* @rm -rf $(TARGET_HWLOC_LIB).* $(shell basename $(TARGET_HWLOC_LIB)).* + @rm -rf $(TARGET_GOTCHA_LIB).* $(shell basename $(TARGET_GOTCHA_LIB)).* @rm -f $(GENGROUPLOCK) @rm -f likwid-config.cmake @rm -rf doc/html @@ -360,7 +385,33 @@ uninstall_freq_moved: @echo "===> No UNINSTALL of setFrequencies tool" endif -install: install_daemon install_freq +ifeq ($(BUILDAPPDAEMON),true) +install_appdaemon: + @echo "===> INSTALL application interface appDaemon to $(PREFIX)/lib/$(APPDAEMON_TARGET)" + @mkdir -p $(PREFIX)/lib + @install -m 755 $(INSTALL_CHOWN) $(APPDAEMON_TARGET) $(PREFIX)/lib/$(APPDAEMON_TARGET) +move_appdaemon: + @echo "===> MOVE application interface appDaemon from $(PREFIX)/lib/$(APPDAEMON_TARGET) to $(INSTALLED_PREFIX)/lib/$(APPDAEMON_TARGET)" + @mkdir -p $(INSTALLED_PREFIX)/lib + @install -m 755 $(INSTALL_CHOWN) $(PREFIX)/lib/$(APPDAEMON_TARGET) $(INSTALLED_PREFIX)/lib/$(APPDAEMON_TARGET) +uninstall_appdaemon: + @echo "===> REMOVING application interface appDaemon from $(PREFIX)/lib/$(APPDAEMON_TARGET)" + @rm -f $(PREFIX)/lib/$(APPDAEMON_TARGET) +uninstall_appdaemon_moved: + @echo "===> REMOVING application interface appDaemon from $(INSTALLED_PREFIX)/lib/$(APPDAEMON_TARGET)" + @rm -f $(INSTALLED_PREFIX)/lib/$(APPDAEMON_TARGET) +else +install_appdaemon: + @echo "===> No INSTALL of the application interface appDaemon" +move_appdaemon: + @echo "===> No MOVE of the application interface appDaemon" +uninstall_appdaemon: + @echo "===> No UNINSTALL of the application interface appDaemon" +uninstall_appdaemon_moved: + @echo "===> No UNINSTALL of the application interface appDaemon" +endif + +install: install_daemon install_freq install_appdaemon @echo "===> INSTALL applications to $(BINPREFIX)" @mkdir -p $(BINPREFIX) @chmod 755 $(BINPREFIX) @@ -388,6 +439,7 @@ install: install_daemon install_freq @if [ "$(LUA_INTERNAL)" = "true" ]; then \ install -m 755 $(TARGET_LUA_LIB) $(LIBPREFIX)/$(shell basename $(TARGET_LUA_LIB)).$(VERSION).$(RELEASE); \ fi + @install -m 755 $(GOTCHA_FOLDER)/$(TARGET_GOTCHA_LIB) $(LIBPREFIX)/$(TARGET_GOTCHA_LIB).$(VERSION).$(RELEASE) @cd $(LIBPREFIX) && ln -fs $(TARGET_LIB).$(VERSION).$(RELEASE) $(TARGET_LIB) @cd $(LIBPREFIX) && ln -fs $(TARGET_LIB).$(VERSION).$(RELEASE) $(TARGET_LIB).$(VERSION) @cd $(LIBPREFIX) && ln -fs $(PINLIB).$(VERSION).$(RELEASE) $(PINLIB) @@ -398,6 +450,8 @@ install: install_daemon install_freq cd $(LIBPREFIX) && ln -fs $(shell basename $(TARGET_LUA_LIB)).$(VERSION).$(RELEASE) $(shell basename $(TARGET_LUA_LIB)); \ cd $(LIBPREFIX) && ln -fs $(shell basename $(TARGET_LUA_LIB)).$(VERSION).$(RELEASE) $(shell basename $(TARGET_LUA_LIB)).$(VERSION); \ fi + @cd $(LIBPREFIX) && ln -fs $(shell basename $(TARGET_GOTCHA_LIB)).$(VERSION).$(RELEASE) $(shell basename $(TARGET_GOTCHA_LIB)) + @cd $(LIBPREFIX) && ln -fs $(shell basename $(TARGET_GOTCHA_LIB)).$(VERSION).$(RELEASE) $(shell basename $(TARGET_GOTCHA_LIB)).$(VERSION) @echo "===> INSTALL man pages to $(MANPREFIX)/man1" @mkdir -p $(MANPREFIX)/man1 @chmod 755 $(MANPREFIX)/man1 @@ -421,6 +475,7 @@ install: install_daemon install_freq @mkdir -p $(PREFIX)/include @chmod 755 $(PREFIX)/include @install -m 644 src/includes/likwid.h $(PREFIX)/include/ + @install -m 644 src/includes/likwid-marker.h $(PREFIX)/include/ @install -m 644 src/includes/bstrlib.h $(PREFIX)/include/ $(FORTRAN_INSTALL) @echo "===> INSTALL groups to $(PREFIX)/share/likwid/perfgroups" @@ -445,7 +500,7 @@ install: install_daemon install_freq done @install -m 644 likwid-config.cmake $(LIBPREFIX) -move: move_daemon move_freq +move: move_daemon move_freq move_appdaemon @echo "===> MOVE applications from $(BINPREFIX) to $(INSTALLED_BINPREFIX)" @mkdir -p $(INSTALLED_BINPREFIX) @chmod 755 $(INSTALLED_BINPREFIX) @@ -469,6 +524,7 @@ move: move_daemon move_freq @install -m 755 $(LIBPREFIX)/$(PINLIB).$(VERSION).$(RELEASE) $(INSTALLED_LIBPREFIX)/$(PINLIB).$(VERSION).$(RELEASE) @install -m 755 $(LIBPREFIX)/$(shell basename $(TARGET_HWLOC_LIB)).$(VERSION).$(RELEASE) $(INSTALLED_LIBPREFIX)/$(shell basename $(TARGET_HWLOC_LIB)).$(VERSION).$(RELEASE) @install -m 755 $(LIBPREFIX)/$(shell basename $(TARGET_LUA_LIB)).$(VERSION).$(RELEASE) $(INSTALLED_LIBPREFIX)/$(shell basename $(TARGET_LUA_LIB)).$(VERSION).$(RELEASE) + @install -m 755 $(LIBPREFIX)/$(shell basename $(TARGET_GOTCHA_LIB)).$(VERSION).$(RELEASE) $(INSTALLED_LIBPREFIX)/$(shell basename $(TARGET_GOTCHA_LIB)).$(VERSION).$(RELEASE) @cd $(INSTALLED_LIBPREFIX) && ln -fs $(TARGET_LIB).$(VERSION).$(RELEASE) $(TARGET_LIB) @cd $(INSTALLED_LIBPREFIX) && ln -fs $(TARGET_LIB).$(VERSION).$(RELEASE) $(TARGET_LIB).$(VERSION) @cd $(INSTALLED_LIBPREFIX) && ln -fs $(PINLIB).$(VERSION).$(RELEASE) $(PINLIB) @@ -477,6 +533,8 @@ move: move_daemon move_freq @cd $(INSTALLED_LIBPREFIX) && ln -fs $(shell basename $(TARGET_HWLOC_LIB)).$(VERSION).$(RELEASE) $(shell basename $(TARGET_HWLOC_LIB)).$(VERSION) @cd $(INSTALLED_LIBPREFIX) && ln -fs $(shell basename $(TARGET_LUA_LIB)).$(VERSION).$(RELEASE) $(shell basename $(TARGET_LUA_LIB)) @cd $(INSTALLED_LIBPREFIX) && ln -fs $(shell basename $(TARGET_LUA_LIB)).$(VERSION).$(RELEASE) $(shell basename $(TARGET_LUA_LIB)).$(VERSION) + @cd $(INSTALLED_LIBPREFIX) && ln -fs $(shell basename $(TARGET_GOTCHA_LIB)).$(VERSION).$(RELEASE) $(shell basename $(TARGET_GOTCHA_LIB)) + @cd $(INSTALLED_LIBPREFIX) && ln -fs $(shell basename $(TARGET_GOTCHA_LIB)).$(VERSION).$(RELEASE) $(shell basename $(TARGET_GOTCHA_LIB)).$(VERSION) @echo "===> MOVE man pages from $(MANPREFIX)/man1 to $(INSTALLED_MANPREFIX)/man1" @mkdir -p $(INSTALLED_MANPREFIX)/man1 @chmod 755 $(INSTALLED_MANPREFIX)/man1 @@ -485,6 +543,7 @@ move: move_daemon move_freq @mkdir -p $(INSTALLED_PREFIX)/include @chmod 755 $(INSTALLED_PREFIX)/include @install -m 644 $(PREFIX)/include/likwid.h $(INSTALLED_PREFIX)/include/likwid.h + @install -m 644 $(PREFIX)/include/likwid-marker.h $(INSTALLED_PREFIX)/include/likwid-marker.h @install -m 644 $(PREFIX)/include/bstrlib.h $(INSTALLED_PREFIX)/include/bstrlib.h @if [ -e $(PREFIX)/include/likwid.mod ]; then install $(PREFIX)/include/likwid.mod $(INSTALLED_PREFIX)/include/likwid.mod; fi @echo "===> MOVE groups from $(PREFIX)/share/likwid/perfgroups to $(INSTALLED_PREFIX)/share/likwid/perfgroups" @@ -507,7 +566,7 @@ move: move_daemon move_freq @chmod 755 $(LIKWIDFILTERPATH)/* @install -m 644 $(LIBPREFIX)/likwid-config.cmake $(INSTALLED_LIBPREFIX) -uninstall: uninstall_daemon uninstall_freq +uninstall: uninstall_daemon uninstall_freq uninstall_appdaemon @echo "===> REMOVING applications from $(PREFIX)/bin" @rm -f $(addprefix $(BINPREFIX)/,$(addsuffix .lua,$(L_APPS))) @for APP in $(L_APPS); do \ @@ -523,6 +582,7 @@ uninstall: uninstall_daemon uninstall_freq @rm -rf $(PREFIX)/share/lua/likwid.lua @echo "===> REMOVING libs from $(LIBPREFIX)" @rm -f $(LIBPREFIX)/liblikwid* + @rm -f $(LIBPREFIX)/$(TARGET_GOTCHA_LIB) @echo "===> REMOVING man pages from $(MANPREFIX)/man1" @rm -f $(addprefix $(MANPREFIX)/man1/,$(addsuffix .1,$(L_APPS))) @rm -f $(MANPREFIX)/man1/feedGnuplot.1 @@ -532,6 +592,7 @@ uninstall: uninstall_daemon uninstall_freq @rm -f $(MANPREFIX)/man1/likwid-bench.1 @echo "===> REMOVING header from $(PREFIX)/include" @rm -f $(PREFIX)/include/likwid.h + @rm -f $(PREFIX)/include/likwid-marker.h @rm -f $(PREFIX)/include/bstrlib.h $(FORTRAN_REMOVE) @echo "===> REMOVING filter, groups and default configs from $(PREFIX)/share/likwid" @@ -542,7 +603,7 @@ uninstall: uninstall_daemon uninstall_freq @rm -rf $(PREFIX)/share/likwid @rm -rf $(LIBPREFIX)/likwid-config.cmake -uninstall_moved: uninstall_daemon_moved uninstall_freq_moved +uninstall_moved: uninstall_daemon_moved uninstall_freq_moved uninstall_appdaemon_moved @echo "===> REMOVING applications from $(INSTALLED_PREFIX)/bin" @rm -f $(addprefix $(INSTALLED_BINPREFIX)/,$(addsuffix .lua,$(L_APPS))) @for APP in $(L_APPS); do \ @@ -558,6 +619,7 @@ uninstall_moved: uninstall_daemon_moved uninstall_freq_moved @rm -rf $(INSTALLED_PREFIX)/share/lua/likwid.lua @echo "===> REMOVING libs from $(INSTALLED_LIBPREFIX)" @rm -f $(INSTALLED_LIBPREFIX)/liblikwid* + @rm -f $(INSTALLED_LIBPREFIX)/$(TARGET_GOTCHA_LIB) @echo "===> REMOVING man pages from $(INSTALLED_MANPREFIX)/man1" @rm -f $(addprefix $(INSTALLED_MANPREFIX)/man1/,$(addsuffix .1,$(L_APPS))) @rm -f $(INSTALLED_MANPREFIX)/man1/feedGnuplot.1 @@ -567,6 +629,7 @@ uninstall_moved: uninstall_daemon_moved uninstall_freq_moved @rm -f $(INSTALLED_MANPREFIX)/man1/likwid-bench.1 @echo "===> REMOVING header from $(INSTALLED_PREFIX)/include" @rm -f $(INSTALLED_PREFIX)/include/likwid.h + @rm -f $(PREFIX)/include/likwid-marker.h @rm -f $(INSTALLED_PREFIX)/include/bstrlib.h $(FORTRAN_REMOVE) @echo "===> REMOVING filter, groups and default configs from $(INSTALLED_PREFIX)/share/likwid" @@ -591,6 +654,8 @@ local: $(L_APPS) likwid.lua @ln -sf $(HWLOC_FOLDER)/liblikwid-hwloc.so liblikwid-hwloc.so.$(VERSION).$(RELEASE) @ln -sf $(LUA_FOLDER)/liblikwid-lua.so liblikwid-lua.so.$(VERSION) @ln -sf $(LUA_FOLDER)/liblikwid-lua.so liblikwid-lua.so.$(VERSION).$(RELEASE) + @ln -sf $(GOTCHA_FOLDER)/liblikwid-gotcha.so liblikwid-gotcha.so.$(VERSION) + @ln -sf $(GOTCHA_FOLDER)/liblikwid-gotcha.so liblikwid-gotcha.so.$(VERSION).$(RELEASE) @if [ -e $(LUA_FOLDER)/liblikwid-lua.so ]; then ln -sf $(LUA_FOLDER)/liblikwid-lua.so liblikwid-lua.so.$(VERSION).$(RELEASE); fi @if [ -e $(HWLOC_FOLDER)/liblikwid-hwloc.so ]; then ln -sf $(HWLOC_FOLDER)/liblikwid-hwloc.so liblikwid-hwloc.so.$(VERSION).$(RELEASE); fi @if [ -e $(PINLIB) ]; then ln -sf $(PINLIB) $(PINLIB).$(VERSION).$(RELEASE); fi diff --git a/README.md b/README.md index f854377ff..196463d91 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,9 @@ Introduction -------------------------------------------------------------------------------- Likwid is a simple to install and use toolsuite of command line applications -for performance oriented programmers. It works for Intel, AMD and ARMv8 processors -on the Linux operating system. +for performance oriented programmers. It works for Intel, AMD, ARMv8 and POWER9 +processors on the Linux operating system. There is support for ARMv7 and POWER8 +but there is currently no test machine in our hands to test them properly. [![Build Status](https://travis-ci.org/RRZE-HPC/likwid.svg?branch=master)](https://travis-ci.org/RRZE-HPC/likwid) @@ -61,9 +62,16 @@ AMD - AMD Interlagos - AMD Kabini - AMD Zen +- AMD Zen2 -ARMv8 (experimental) -- Tested on Marvell Thunder X2 +ARM (experimental) +- ARMv7 +- ARMv8 +- Special support for Marvell Thunder X2 + +POWER (experimental) +- IBM POWER8 +- IBM POWER9 -------------------------------------------------------------------------------- Download, Build and Install @@ -105,7 +113,7 @@ https://github.com/rrze-likwid/likwid/issues Extras -------------------------------------------------------------------------------- - If you want to use the Marker API with Java, you can find the Java module here: -https://github.com/jlewandowski/likwid-java-api +https://github.com/jacek-lewandowski/likwid-java-api - For Python you can find an interface to the LIKWID API here: https://github.com/RRZE-HPC/pylikwid or `pip install pylikwid` diff --git a/bench/Makefile b/bench/Makefile index d2acd57b8..78100eb20 100644 --- a/bench/Makefile +++ b/bench/Makefile @@ -63,6 +63,9 @@ endif ifeq ($(COMPILER),GCCARMv8) BENCH_DIR = ./armv8 endif +ifeq ($(COMPILER),GCCPOWER) +BENCH_DIR = ./power +endif diff --git a/bench/armv8/daxpy.ptt b/bench/armv8/daxpy.ptt new file mode 100644 index 000000000..945adbddb --- /dev/null +++ b/bench/armv8/daxpy.ptt @@ -0,0 +1,15 @@ +STREAMS 2 +TYPE DOUBLE +FLOPS 2 +BYTES 24 +DESC Double-precision linear combination of two vectors, only scalar operations +LOADS 2 +STORES 1 +INSTR_LOOP 4 +fmov FPR7, ARG1 +LOOP 1 +ldr FPR1, [STR0], #8 +fmul FPR1, FPR1, FPR7 +ldr FPR2, [STR1] +fadd FPR1, FPR1, FPR2 +str FPR1, [STR1], #8 diff --git a/bench/armv8/triad.ptt b/bench/armv8/triad.ptt new file mode 100644 index 000000000..94572be3c --- /dev/null +++ b/bench/armv8/triad.ptt @@ -0,0 +1,16 @@ +STREAMS 4 +TYPE DOUBLE +FLOPS 2 +BYTES 32 +DESC Double-precision triad A(i) = B(i) * C(i) + D(i), only scalar operations +LOADS 3 +STORES 1 +INSTR_LOOP 6 +LOOP 1 +ldr D1, [STR1], #8 +ldr D2, [STR2], #8 +ldr D3, [STR3], #8 +fmul D1, D1, D2 +fadd D1, D1, D3 +str D1, [STR0], #8 + diff --git a/bench/includes/allocator.h b/bench/includes/allocator.h index bb1da2360..658693d25 100644 --- a/bench/includes/allocator.h +++ b/bench/includes/allocator.h @@ -45,6 +45,7 @@ extern void allocator_allocateVector(void** ptr, int offset, DataType type, int stride, - bstring domain); + bstring domain, + int init_per_thread); #endif /*ALLOCATOR_H*/ diff --git a/bench/includes/bstrlib_helper.h b/bench/includes/bstrlib_helper.h new file mode 120000 index 000000000..2536c9b83 --- /dev/null +++ b/bench/includes/bstrlib_helper.h @@ -0,0 +1 @@ +../../src/includes/bstrlib_helper.h \ No newline at end of file diff --git a/bench/includes/isa_armv7.h b/bench/includes/isa_armv7.h new file mode 100644 index 000000000..e3538d23b --- /dev/null +++ b/bench/includes/isa_armv7.h @@ -0,0 +1,214 @@ +/* + * ======================================================================================= + * Filename: isa_armv7.h + * + * Description: Definitions used for dynamically compile benchmarks for ARMv7 systems + * + * Version: + * Released: + * + * Author: Thomas Gruber (tg), thomas.roehl@gmail.com + * Project: likwid + * + * Copyright (C) 2019 RRZE, University Erlangen-Nuremberg + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + * + * ======================================================================================= + */ +#ifndef LIKWID_BENCH_ISA_ARMV7_H +#define LIKWID_BENCH_ISA_ARMV7_H + +#include +#include + +#define ARCHNAME "armv7" +#define WORDLENGTH 4 + +int header(struct bstrList* code, char* funcname) +{ + bstring glline; + bstring typeline; + bstring label; + if (funcname) + { + glline = bformat(".global %s", funcname); + typeline = bformat(".type %s, \%function", funcname); + label = bformat("%s :", funcname); + } + else + { + glline = bformat(".global kernelfunction"); + typeline = bformat(".type kernelfunction, \%function"); + label = bformat("kernelfunction :"); + } + + + bstrListAddChar(code, ".cpu cortex-a15\n.fpu neon-vfpv4"); + bstrListAddChar(code, ".data"); + bstrListAddChar(code, ".text"); + bstrListAdd(code, glline); + bstrListAdd(code, typeline); + bstrListAdd(code, label); + bstrListAddChar(code, "push {r4-r7, lr}"); + bstrListAddChar(code, "add r7, sp, #12"); + bstrListAddChar(code, "push {r8, r10, r11}"); + bstrListAddChar(code, "vstmdb sp!, {d8-d15}"); + + + + bstrListAddChar(code, "\n"); + + bdestroy(glline); + bdestroy(typeline); + bdestroy(label); + return 0; +} + +int footer(struct bstrList* code, char* funcname) +{ + bstring line; + if (funcname) + { + line = bformat(".size %s, .-%s", funcname, funcname); + } + else + { + line = bformat(".size kernelfunction, .-kernelfunction"); + } + bstrListAddChar(code, "vldmia sp!, {d8-d15}"); + bstrListAddChar(code, "pop {r8, r10, r11}"); + bstrListAddChar(code, "pop {r4-r7, pc}"); + + bstrListAdd(code, line); + + bstrListAddChar(code, "\n"); + + bstrListAddChar(code, "#if defined(__linux__) && defined(__ELF__)"); + bstrListAddChar(code, ".section .note.GNU-stack,\"\",%progbits"); + bstrListAddChar(code, "#endif"); + + bdestroy(line); +} + +int loopheader(struct bstrList* code, char* loopname, int step) +{ + bstring line; + if (loopname) + { + line = bformat("%s:", loopname); + } + else + { + line = bformat("kernelfunctionloop:"); + } + + bstrListAddChar(code, "mov GPR4, #0"); + bstrListAddChar(code, ".align 2"); + bstrListAdd(code, line); + bstrListAddChar(code, "\n"); + + bdestroy(line); + return 0; +} + +int loopfooter(struct bstrList* code, char* loopname, int step) +{ + bstring line; + if (loopname) + { + line = bformat("blt %sb", loopname); + } + else + { + line = bformat("blt kernelfunctionloopb"); + } + bstring bstep = bformat("add GPR4, #%d", step); + bstrListAdd(code, bstep); + bdestroy(bstep); + bstrListAddChar(code, "cmp GPR4, GPR1"); + bstrListAdd(code, line); + + bstrListAddChar(code, "\n"); + + bdestroy(line); + return 0; +} + + +static RegisterMap Registers[] = { + {"GPR1", "r0"}, + {"GPR2", "r1"}, + {"GPR3", "r2"}, + {"GPR4", "r3"}, + {"GPR5", "r4"}, + {"GPR6", "r5"}, + {"GPR7", "r6"}, + {"GPR8", "r7"}, + {"GPR9", "r8"}, + {"GPR10", "r9"}, + {"GPR11", "r10"}, + {"GPR12", "r11"}, + {"GPR13", "r12"}, + {"GPR14", "r13"}, + {"GPR15", "r14"}, + {"GPR16", "r15"}, + {"FPR1", "d0"}, + {"FPR2", "d1"}, + {"FPR3", "d2"}, + {"FPR4", "d3"}, + {"FPR5", "d4"}, + {"FPR6", "d5"}, + {"FPR7", "d6"}, + {"FPR8", "d7"}, + {"FPR9", "d8"}, + {"FPR10", "d9"}, + {"FPR11", "d10"}, + {"FPR12", "d11"}, + {"FPR13", "d12"}, + {"FPR14", "d13"}, + {"FPR15", "d14"}, + {"FPR16", "d15"}, + {"", ""}, +}; + +static RegisterMap Arguments[] = { + {"ARG1", "r0"}, + {"ARG2", "r1"}, + {"ARG3", "r2"}, + {"ARG4", "r3"}, + {"ARG7", "[SPTR+8]"}, + {"ARG8", "[SPTR+12]"}, + {"ARG9", "[SPTR+16]"}, + {"ARG10", "[SPTR+20]"}, + {"ARG11", "[SPTR+24]"}, + {"ARG12", "[SPTR+28]"}, + {"ARG13", "[SPTR+32]"}, + {"ARG14", "[SPTR+36]"}, + {"ARG15", "[SPTR+40]"}, + {"ARG16", "[SPTR+44]"}, + {"ARG17", "[SPTR+48]"}, + {"ARG18", "[SPTR+52]"}, + {"ARG19", "[SPTR+56]"}, + {"ARG20", "[SPTR+60]"}, + {"ARG21", "[SPTR+64]"}, + {"ARG22", "[SPTR+68]"}, + {"ARG23", "[SPTR+72]"}, + {"ARG24", "[SPTR+76]"}, + {"", ""}, +}; + +static RegisterMap Sptr = {"SPTR", "sp"}; +static RegisterMap Bptr = {"BPTR", "rbp"}; + +#endif diff --git a/bench/includes/isa_armv8.h b/bench/includes/isa_armv8.h new file mode 100644 index 000000000..e1e14411e --- /dev/null +++ b/bench/includes/isa_armv8.h @@ -0,0 +1,214 @@ +/* + * ======================================================================================= + * Filename: isa_armv8.h + * + * Description: Definitions used for dynamically compile benchmarks for ARMv8 systems + * + * Version: + * Released: + * + * Author: Thomas Gruber (tg), thomas.roehl@gmail.com + * Project: likwid + * + * Copyright (C) 2019 RRZE, University Erlangen-Nuremberg + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + * + * ======================================================================================= + */ +#ifndef LIKWID_BENCH_ISA_ARMV8_H +#define LIKWID_BENCH_ISA_ARMV8_H + +#include +#include + +#define ARCHNAME "armv8" +#define WORDLENGTH 4 + +int header(struct bstrList* code, char* funcname) +{ + bstring glline; + bstring typeline; + bstring label; + if (funcname) + { + glline = bformat(".global %s", funcname); + typeline = bformat(".type %s, @function", funcname); + label = bformat("%s :", funcname); + } + else + { + glline = bformat(".global kernelfunction"); + typeline = bformat(".type kernelfunction, @function"); + label = bformat("kernelfunction :"); + } + + + bstrListAddChar(code, ".cpu generic+fp+simd"); + bstrListAddChar(code, ".data"); + bstrListAddChar(code, ".text"); + bstrListAdd(code, glline); + bstrListAdd(code, typeline); + bstrListAdd(code, label); + + bstrListAddChar(code, "\n"); + + bdestroy(glline); + bdestroy(typeline); + bdestroy(label); + return 0; +} + +int footer(struct bstrList* code, char* funcname) +{ + bstring line; + if (funcname) + { + line = bformat(".size %s, .-%s", funcname, funcname); + } + else + { + line = bformat(".size kernelfunction, .-kernelfunction"); + } + bstrListAddChar(code, ".exit:"); + bstrListAddChar(code, "ret"); + + bstrListAdd(code, line); + + bstrListAddChar(code, "\n"); + + bstrListAddChar(code, "#if defined(__linux__) && defined(__ELF__)"); + bstrListAddChar(code, ".section .note.GNU-stack,\"\",%progbits"); + bstrListAddChar(code, "#endif"); + + bdestroy(line); +} + +int loopheader(struct bstrList* code, char* loopname, int step) +{ + bstring line; + if (loopname) + { + line = bformat("%s:", loopname); + } + else + { + line = bformat("kernelfunctionloop:"); + } + + bstrListAddChar(code, "mov GPR6, 0"); + bstrListAdd(code, line); + bstrListAddChar(code, "\n"); + + bdestroy(line); + return 0; +} + +int loopfooter(struct bstrList* code, char* loopname, int step) +{ + bstring line; + if (loopname) + { + line = bformat("tblt %s", loopname); + } + else + { + line = bformat("tblt kernelfunctionloop"); + } + bstring bstep = bformat("add GPR6, GPR6, #%d", step); + bstrListAdd(code, bstep); + bdestroy(bstep); + bstrListAddChar(code, "cmp GPR6, ARG1"); + bstrListAdd(code, line); + + bstrListAddChar(code, "\n"); + + bdestroy(line); + return 0; +} + + +static RegisterMap Registers[] = { + {"GPR1", "x1"}, + {"GPR2", "x2"}, + {"GPR3", "x3"}, + {"GPR4", "x4"}, + {"GPR5", "x5"}, + {"GPR6", "x6"}, + {"GPR7", "x7"}, + {"GPR8", "x8"}, + {"GPR9", "x9"}, + {"GPR10", "x10"}, + {"GPR11", "x11"}, + {"GPR12", "x12"}, + {"GPR13", "x13"}, + {"GPR14", "x14"}, + {"GPR15", "x15"}, + {"GPR16", "x16"}, + {"GPR17", "x17"}, + {"GPR18", "x18"}, + {"GPR19", "x19"}, + {"GPR20", "x20"}, + {"GPR21", "x21"}, + {"GPR22", "x22"}, + {"FPR1", "d0"}, + {"FPR2", "d1"}, + {"FPR3", "d2"}, + {"FPR4", "d3"}, + {"FPR5", "d4"}, + {"FPR6", "d5"}, + {"FPR7", "d6"}, + {"FPR8", "d7"}, + {"FPR9", "d8"}, + {"FPR10", "d9"}, + {"FPR11", "d10"}, + {"FPR12", "d11"}, + {"FPR13", "d12"}, + {"FPR14", "d13"}, + {"FPR15", "d14"}, + {"FPR16", "d15"}, + {"", ""}, +}; + +static RegisterMap Arguments[] = { + {"ARG1", "x0"}, + {"ARG2", "x1"}, + {"ARG3", "x2"}, + {"ARG4", "x3"}, + {"ARG5", "x4"}, + {"ARG6", "x5"}, + {"ARG7", "x6"}, + {"ARG8", "x7"}, + {"ARG9", "[SPTR+32]"}, + {"ARG10", "[SPTR+40]"}, + {"ARG11", "[SPTR+48]"}, + {"ARG12", "[SPTR+56]"}, + {"ARG13", "[SPTR+64]"}, + {"ARG14", "[SPTR+72]"}, + {"ARG15", "[SPTR+80]"}, + {"ARG16", "[SPTR+88]"}, + {"ARG17", "[SPTR+96]"}, + {"ARG18", "[SPTR+104]"}, + {"ARG19", "[SPTR+112]"}, + {"ARG20", "[SPTR+120]"}, + {"ARG21", "[SPTR+128]"}, + {"ARG22", "[SPTR+136]"}, + {"ARG23", "[SPTR+144]"}, + {"ARG24", "[SPTR+152]"}, + {"", ""}, +}; + +static RegisterMap Sptr = {"SPTR", "sp"}; +static RegisterMap Bptr = {"BPTR", "rbp"}; + +#endif diff --git a/bench/includes/isa_x86-64.h b/bench/includes/isa_x86-64.h new file mode 100644 index 000000000..2138268ca --- /dev/null +++ b/bench/includes/isa_x86-64.h @@ -0,0 +1,226 @@ +/* + * ======================================================================================= + * Filename: isa_x86-64.h + * + * Description: Definitions used for dynamically compile benchmarks for x86-64 systems + * + * Version: + * Released: + * + * Author: Thomas Gruber (tg), thomas.roehl@gmail.com + * Project: likwid + * + * Copyright (C) 2019 RRZE, University Erlangen-Nuremberg + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + * + * ======================================================================================= + */ +#ifndef LIKWID_BENCH_ISA_X8664_H +#define LIKWID_BENCH_ISA_X8664_H + +#include +#include + +#define ARCHNAME "x86-64" + + +int header(struct bstrList* code, char* funcname) +{ + bstring glline; + bstring typeline; + bstring label; + if (funcname) + { + glline = bformat(".global %s", funcname); + typeline = bformat(".type %s, @function", funcname); + label = bformat("%s :", funcname); + } + else + { + glline = bformat(".global kernelfunction"); + typeline = bformat(".type kernelfunction, @function"); + label = bformat("kernelfunction :"); + } + + + bstrListAddChar(code, ".intel_syntax noprefix"); + bstrListAddChar(code, ".data"); + bstrListAddChar(code, ".align 64\nSCALAR:\n.double 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0"); + bstrListAddChar(code, ".align 64\nSSCALAR:\n.single 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0"); + bstrListAddChar(code, ".align 64\nISCALAR:\n.int 1, 1, 1, 1, 1, 1, 1, 1"); + bstrListAddChar(code, ".align 16\nOMM:\n.int 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15"); + bstrListAddChar(code, ".align 16\nIOMM:\n.int 0,16,32,48,64,80,96,128,144,160,176,192,208,224,240,256"); + bstrListAddChar(code, ".align 16\nTOMM:\n.int 0,2,4,6,16,18,20,22,32,34,36,38,48,50,52,54"); + bstrListAddChar(code, ".text"); + bstrListAdd(code, glline); + bstrListAdd(code, typeline); + bstrListAdd(code, label); + bstrListAddChar(code, "push rbp"); + bstrListAddChar(code, "mov rbp, rsp"); + bstrListAddChar(code, "push rbx"); + bstrListAddChar(code, "push r12"); + bstrListAddChar(code, "push r13"); + bstrListAddChar(code, "push r14"); + bstrListAddChar(code, "push r15"); + + bstrListAddChar(code, "\n"); + + bdestroy(glline); + bdestroy(typeline); + bdestroy(label); + return 0; +} + +int footer(struct bstrList* code, char* funcname) +{ + bstring line; + if (funcname) + { + line = bformat(".size %s, .-%s", funcname, funcname); + } + else + { + line = bformat(".size kernelfunction, .-kernelfunction"); + } + bstrListAddChar(code, "pop r15"); + bstrListAddChar(code, "pop r14"); + bstrListAddChar(code, "pop r13"); + bstrListAddChar(code, "pop r12"); + bstrListAddChar(code, "pop rbx"); + bstrListAddChar(code, "mov rsp, rbp"); + bstrListAddChar(code, "pop rbp"); + bstrListAddChar(code, "ret"); + + bstrListAdd(code, line); + + bstrListAddChar(code, "\n"); + + bstrListAddChar(code, "#if defined(__linux__) && defined(__ELF__)"); + bstrListAddChar(code, ".section .note.GNU-stack,\"\",%progbits"); + bstrListAddChar(code, "#endif"); + + bdestroy(line); +} + +int loopheader(struct bstrList* code, char* loopname, int step) +{ + bstring line; + if (loopname) + { + line = bformat("%s:", loopname); + } + else + { + line = bformat("kernelfunctionloop:"); + } + + bstrListAddChar(code, "xor GPR1, GPR1"); + bstrListAddChar(code, ".align 16"); + bstrListAdd(code, line); + bstrListAddChar(code, "\n"); + + bdestroy(line); + return 0; +} + +int loopfooter(struct bstrList* code, char* loopname, int step) +{ + bstring line; + if (loopname) + { + line = bformat("jl %sb", loopname); + } + else + { + line = bformat("jl kernelfunctionloopb"); + } + bstring bstep = bformat("add GPR1, %d", step); + bstrListAdd(code, bstep); + bdestroy(bstep); + bstrListAddChar(code, "cmp GPR1, ARG1"); + bstrListAdd(code, line); + + bstrListAddChar(code, "\n"); + + bdestroy(line); + return 0; +} + + +static RegisterMap Registers[] = { + {"GPR1", "rax"}, + {"GPR2", "rbx"}, + {"GPR3", "rcx"}, + {"GPR4", "rdx"}, + {"GPR5", "rsi"}, + {"GPR6", "rdi"}, + {"GPR7", "r8"}, + {"GPR8", "r9"}, + {"GPR9", "r10"}, + {"GPR10", "r11"}, + {"GPR11", "r12"}, + {"GPR12", "r13"}, + {"GPR13", "r14"}, + {"GPR14", "r15"}, + {"FPR1", "xmm0"}, + {"FPR2", "xmm1"}, + {"FPR3", "xmm2"}, + {"FPR4", "xmm3"}, + {"FPR5", "xmm4"}, + {"FPR6", "xmm5"}, + {"FPR7", "xmm6"}, + {"FPR8", "xmm7"}, + {"FPR9", "xmm8"}, + {"FPR10", "xmm9"}, + {"FPR11", "xmm10"}, + {"FPR12", "xmm11"}, + {"FPR13", "xmm12"}, + {"FPR14", "xmm13"}, + {"FPR15", "xmm14"}, + {"FPR16", "xmm15"}, + {"", ""}, +}; + +static RegisterMap Arguments[] = { + {"ARG1", "rdi"}, + {"ARG2", "rsi"}, + {"ARG3", "rdx"}, + {"ARG4", "rcx"}, + {"ARG5", "r8"}, + {"ARG6", "r9"}, + {"ARG7", "[BPTR+16]"}, + {"ARG8", "[BPTR+24]"}, + {"ARG9", "[BPTR+32]"}, + {"ARG10", "[BPTR+40]"}, + {"ARG11", "[BPTR+48]"}, + {"ARG12", "[BPTR+56]"}, + {"ARG13", "[BPTR+64]"}, + {"ARG14", "[BPTR+72]"}, + {"ARG15", "[BPTR+80]"}, + {"ARG16", "[BPTR+88]"}, + {"ARG17", "[BPTR+96]"}, + {"ARG18", "[BPTR+104]"}, + {"ARG19", "[BPTR+112]"}, + {"ARG20", "[BPTR+120]"}, + {"ARG21", "[BPTR+128]"}, + {"ARG22", "[BPTR+136]"}, + {"ARG23", "[BPTR+144]"}, + {"ARG24", "[BPTR+152]"}, + {"", ""}, +}; + +static RegisterMap Sptr = {"SPTR", "rsp"}; +static RegisterMap Bptr = {"BPTR", "rbp"}; + +#endif diff --git a/bench/includes/isa_x86.h b/bench/includes/isa_x86.h new file mode 100644 index 000000000..21350fcc3 --- /dev/null +++ b/bench/includes/isa_x86.h @@ -0,0 +1,204 @@ +/* + * ======================================================================================= + * Filename: isa_x86.h + * + * Description: Definitions used for dynamically compile benchmarks for x86 systems + * + * Version: + * Released: + * + * Author: Thomas Gruber (tg), thomas.roehl@gmail.com + * Project: likwid + * + * Copyright (C) 2019 RRZE, University Erlangen-Nuremberg + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + * + * ======================================================================================= + */ +#ifndef LIKWID_BENCH_ISA_X86_H +#define LIKWID_BENCH_ISA_X86_H + + +#include +#include + +#define ARCHNAME "x86" +#define WORDLENGTH 4 + + +int header(struct bstrList* code, char* funcname) +{ + bstring glline; + bstring typeline; + bstring label; + if (funcname) + { + glline = bformat(".global %s", funcname); + typeline = bformat(".type %s, @function", funcname); + label = bformat("%s :", funcname); + } + else + { + glline = bformat(".global kernelfunction"); + typeline = bformat(".type kernelfunction, @function"); + label = bformat("kernelfunction :"); + } + + + bstrListAddChar(code, ".intel_syntax noprefix"); + bstrListAddChar(code, ".data"); + bstrListAddChar(code, ".align 64\nSCALAR:\n.double 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0"); + bstrListAddChar(code, ".align 64\nSSCALAR:\n.single 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0"); + bstrListAddChar(code, ".align 64\nISCALAR:\n.int 1, 1, 1, 1, 1, 1, 1, 1"); + bstrListAddChar(code, ".align 16\nOMM:\n.int 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15"); + bstrListAddChar(code, ".align 16\nIOMM:\n.int 0,16,32,48,64,80,96,128,144,160,176,192,208,224,240,256"); + bstrListAddChar(code, ".align 16\nTOMM:\n.int 0,2,4,6,16,18,20,22,32,34,36,38,48,50,52,54"); + bstrListAddChar(code, ".text"); + bstrListAdd(code, glline); + bstrListAdd(code, typeline); + bstrListAdd(code, label); + + + bstrListAddChar(code, "\n"); + + bdestroy(glline); + bdestroy(typeline); + bdestroy(label); + return 0; +} + +int footer(struct bstrList* code, char* funcname) +{ + bstring line; + if (funcname) + { + line = bformat(".size %s, .-%s", funcname, funcname); + } + else + { + line = bformat(".size kernelfunction, .-kernelfunction"); + } + bstrListAddChar(code, "pop edi"); + bstrListAddChar(code, "pop esi"); + bstrListAddChar(code, "pop ebx"); + bstrListAddChar(code, "mov esp, ebp"); + bstrListAddChar(code, "pop ebp"); + bstrListAddChar(code, "ret"); + + bstrListAdd(code, line); + + bstrListAddChar(code, "\n"); + + bstrListAddChar(code, "#if defined(__linux__) && defined(__ELF__)"); + bstrListAddChar(code, ".section .note.GNU-stack,\"\",%progbits"); + bstrListAddChar(code, "#endif"); + + bdestroy(line); +} + +int loopheader(struct bstrList* code, char* loopname, int step) +{ + bstring line; + if (loopname) + { + line = bformat("%s:", loopname); + } + else + { + line = bformat("kernelfunctionloop:"); + } + + bstrListAddChar(code, "xor GPR1, GPR1"); + bstrListAddChar(code, ".align 16"); + bstrListAdd(code, line); + bstrListAddChar(code, "\n"); + + bdestroy(line); + return 0; +} + +int loopfooter(struct bstrList* code, char* loopname, int step) +{ + bstring line; + if (loopname) + { + line = bformat("jl %sb", loopname); + } + else + { + line = bformat("jl kernelfunctionloopb"); + } + bstring bstep = bformat("add GPR1, %d", step); + bstrListAdd(code, bstep); + bdestroy(bstep); + bstrListAddChar(code, "cmp GPR1, ARG1"); + bstrListAdd(code, line); + + bstrListAddChar(code, "\n"); + + bdestroy(line); + return 0; +} + + +static RegisterMap Registers[] = { + {"GPR1", "eax"}, + {"GPR2", "ebx"}, + {"GPR3", "ecx"}, + {"GPR4", "edx"}, + {"GPR5", "esi"}, + {"GPR6", "edi"}, + {"FPR1", "xmm0"}, + {"FPR2", "xmm1"}, + {"FPR3", "xmm2"}, + {"FPR4", "xmm3"}, + {"FPR5", "xmm4"}, + {"FPR6", "xmm5"}, + {"FPR7", "xmm6"}, + {"FPR8", "xmm7"}, + {"", ""}, +}; + +static RegisterMap Arguments[] = { + {"ARG1", "rdi"}, + {"ARG2", "rsi"}, + {"ARG3", "rdx"}, + {"ARG4", "rcx"}, + {"ARG5", "r8"}, + {"ARG6", "r9"}, + {"ARG7", "[BPTR+8]"}, + {"ARG8", "[BPTR+12]"}, + {"ARG9", "[BPTR+16]"}, + {"ARG10", "[BPTR+20]"}, + {"ARG11", "[BPTR+24]"}, + {"ARG12", "[BPTR+28]"}, + {"ARG13", "[BPTR+32]"}, + {"ARG14", "[BPTR+36]"}, + {"ARG15", "[BPTR+40]"}, + {"ARG16", "[BPTR+44]"}, + {"ARG17", "[BPTR+48]"}, + {"ARG18", "[BPTR+52]"}, + {"ARG19", "[BPTR+56]"}, + {"ARG20", "[BPTR+60]"}, + {"ARG21", "[BPTR+64]"}, + {"ARG22", "[BPTR+68]"}, + {"ARG23", "[BPTR+72]"}, + {"ARG24", "[BPTR+76]"}, + {"", ""}, +}; + +static RegisterMap Sptr = {"SPTR", "esp"}; +static RegisterMap Bptr = {"BPTR", "ebp"}; + +#endif diff --git a/bench/includes/ptt2asm.h b/bench/includes/ptt2asm.h new file mode 100644 index 000000000..6876f31aa --- /dev/null +++ b/bench/includes/ptt2asm.h @@ -0,0 +1,88 @@ +/* + * ======================================================================================= + * Filename: ptt2asm.h + * + * Description: The interface to dynamically load ptt files + * + * Version: + * Released: + * + * Author: Thomas Gruber (tg), thomas.roehl@gmail.com + * Project: likwid + * + * Copyright (C) 2019 RRZE, University Erlangen-Nuremberg + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + * + * ======================================================================================= + */ +#ifndef LIKWID_BENCH_PTT2ASM_H +#define LIKWID_BENCH_PTT2ASM_H + +typedef struct { + char* pattern; + char* reg; +} RegisterMap; + +static RegisterMap StreamPatterns[] = { + {"STR0", "ARG2"}, + {"STR1", "ARG3"}, + {"STR2", "ARG4"}, + {"STR3", "ARG5"}, + {"STR4", "ARG6"}, + {"STR5", "[rbp+16]"}, + {"STR6", "[rbp+24]"}, + {"STR7", "[rbp+32]"}, + {"STR8", "[rbp+40]"}, + {"STR9", "[rbp+48]"}, + {"STR10", "[rbp+56]"}, + {"STR11", "[rbp+64]"}, + {"STR12", "[rbp+72]"}, + {"STR13", "[rbp+80]"}, + {"STR14", "[rbp+88]"}, + {"STR15", "[rbp+96]"}, + {"STR16", "[rbp+104]"}, + {"STR17", "[rbp+112]"}, + {"STR18", "[rbp+120]"}, + {"STR19", "[rbp+128]"}, + {"STR20", "[rbp+136]"}, + {"STR21", "[rbp+144]"}, + {"STR22", "[rbp+152]"}, + {"STR23", "[rbp+160]"}, + {"STR24", "[rbp+168]"}, + {"STR25", "[rbp+176]"}, + {"STR26", "[rbp+184]"}, + {"STR27", "[rbp+192]"}, + {"STR28", "[rbp+200]"}, + {"STR29", "[rbp+208]"}, + {"STR30", "[rbp+216]"}, + {"STR31", "[rbp+224]"}, + {"STR32", "[rbp+232]"}, + {"STR33", "[rbp+240]"}, + {"STR34", "[rbp+248]"}, + {"STR35", "[rbp+256]"}, + {"STR36", "[rbp+264]"}, + {"STR37", "[rbp+272]"}, + {"STR38", "[rbp+280]"}, + {"STR39", "[rbp+288]"}, + {"STR40", "[rbp+296]"}, + {"", ""}, +}; + +struct bstrList* dynbench_getall(); + +int dynbench_test(bstring testname); +int dynbench_load(bstring testname, TestCase **testcase, char* tmpfolder, char *compilers, char* compileflags); +int dynbench_close(TestCase* testcase, char* tmpfolder); + +#endif diff --git a/bench/includes/strUtil.h b/bench/includes/strUtil.h index 66722373d..2b197c11c 100644 --- a/bench/includes/strUtil.h +++ b/bench/includes/strUtil.h @@ -50,6 +50,7 @@ typedef struct { uint32_t numberOfThreads; int* processorIds; uint64_t size; + int init_per_thread; Stream* streams; } Workgroup; diff --git a/bench/includes/test_types.h b/bench/includes/test_types.h index b4080d1ef..652a7b6b3 100644 --- a/bench/includes/test_types.h +++ b/bench/includes/test_types.h @@ -41,6 +41,7 @@ typedef enum { INT} DataType; typedef enum { + STREAM_0 = 0, STREAM_1 = 1, STREAM_2, STREAM_3, @@ -96,6 +97,8 @@ typedef struct { int instr_const; int instr_loop; int uops; + int loadstores; + void* dlhandle; } TestCase; typedef struct { @@ -105,6 +108,7 @@ typedef struct { const TestCase* test; uint64_t cycles; uint32_t numberOfThreads; + int init_per_thread; int* processors; void** streams; } ThreadUserData; diff --git a/bench/includes/threads.h b/bench/includes/threads.h index f0953b589..94d964fa1 100644 --- a/bench/includes/threads.h +++ b/bench/includes/threads.h @@ -32,6 +32,7 @@ #include #include +#include #define THREADS_BARRIER pthread_barrier_wait(&threads_barrier) #define MIN_ITERATIONS 10 @@ -107,7 +108,8 @@ extern void threads_destroy(int numberOfGroups, int numberOfStreams); /** * @brief Create Thread groups * @param numberOfGroups The number of groups to create + * @param groups Pointer to the groups data */ -extern void threads_createGroups(int numberOfGroups); +extern void threads_createGroups(int numberOfGroups, Workgroup *groups); #endif /* THREADS_H */ diff --git a/bench/likwid-bench.c b/bench/likwid-bench.c index 0268ed367..cbdf087c3 100644 --- a/bench/likwid-bench.c +++ b/bench/likwid-bench.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -47,8 +48,10 @@ #include #include #include +#include #include +#include #define STRINGIFY(x) #x #define TOSTRING(x) STRINGIFY(x) @@ -72,7 +75,14 @@ extern void* getIterSingle(void* arg); printf("-l \t list properties of benchmark \n"); \ printf("-t \t type of test \n"); \ printf("-w\t\t :[:[::]-:[:]\n"); \ - printf("\t\t in kB, MB or GB (mandatory)\n"); \ + printf("-W\t\t :[:[::]]\n"); \ + printf("\t\t in kB, MB or GB (mandatory)\n"); \ + printf("For dynamically loaded benchmarks\n"); \ + printf("-f \t Specify a folder for the temporary files. default: /tmp\n"); \ + printf("\n"); \ + printf("Difference between -w and -W :\n"); \ + printf("-w allocates the streams in the thread_domain with one thread and support placement of streams\n"); \ + printf("-W allocates the streams chunk-wise by each thread in the thread_domain\n"); \ printf("\n"); \ printf("Usage: \n"); \ printf("# Run the store benchmark on all CPUs of the system with a vector size of 1 GB\n"); \ @@ -81,6 +91,8 @@ extern void* getIterSingle(void* arg); printf("likwid-bench -t copy -w S0:100kB:1\n"); \ printf("# Run the copy benchmark on one CPU at CPU socket 0 with a vector size of 100MB but place one stream on CPU socket 1\n"); \ printf("likwid-bench -t copy -w S0:100MB:1-0:S0,1:S1\n"); \ +/* printf("-c \t Specify a list of compilers that should be searched for. default: gcc,icc,pgcc\n"); \*/ +/* printf("-f \t Specify compiler flags. Use \". default: \"-shared -fPIC\"\n"); \*/ #define VERSION_MSG \ printf("likwid-bench -- Version %d.%d.%d\n",VERSION,RELEASE,MINORVERSION); \ @@ -108,6 +120,13 @@ copyThreadData(ThreadUserData* src,ThreadUserData* dst) } +void illhandler(int signum, siginfo_t *info, void *ptr) +{ + fprintf(stderr, "ERROR: Illegal instruction\n"); + fprintf(stderr, "This happens if you want to run a kernel that uses instructions not available on your system.\n"); + exit(EXIT_FAILURE); +} + /* ##### FUNCTION DEFINITIONS - EXPORTED FUNCTIONS ################## */ @@ -126,7 +145,7 @@ int main(int argc, char** argv) double time; double cycPerUp = 0.0; double cycPerCL = 0.0; - const TestCase* test = NULL; + TestCase* test = NULL; uint64_t realSize = 0; uint64_t realIter = 0; uint64_t maxCycles = 0; @@ -141,8 +160,17 @@ int main(int argc, char** argv) binsertch(HLINE, 0, 80, '-'); binsertch(HLINE, 80, 1, '\n'); int (*ownprintf)(const char *format, ...); +#ifdef _ARCH_PPC + int clsize = 128; +#else int clsize = sysconf (_SC_LEVEL1_DCACHE_LINESIZE); +#endif + char compilers[512] = "gcc,icc,pgcc"; + char defcompilepath[512] = "/tmp"; + char compilepath[513] = ""; + char compileflags[512] = "-shared -fPIC"; ownprintf = &printf; + struct sigaction sig; /* Handling of command line options */ if (argc == 1) @@ -151,7 +179,23 @@ int main(int argc, char** argv) exit(EXIT_SUCCESS); } - while ((c = getopt (argc, argv, "w:t:s:l:aphvi:")) != -1) { + while ((c = getopt (argc, argv, "W:w:t:s:l:aphvi:f:")) != -1) { + switch (c) + { + case 'f': + tmp = snprintf(compilepath, 512, "%s", optarg); + if (tmp > 0) + { + compilepath[tmp] = '\0'; + } + break; + default: + break; + } + } + optind = 0; + + while ((c = getopt (argc, argv, "W:w:t:s:l:aphvi:f:")) != -1) { switch (c) { case 'h': @@ -162,8 +206,29 @@ int main(int argc, char** argv) exit (EXIT_SUCCESS); case 'a': ownprintf(TESTS"\n"); + + struct bstrList* l = dynbench_getall(); + if (l) + { + ownprintf("\nUser benchmarks:\n"); + for (i = 0; i < l->qty; i++) + { + if (dynbench_test(l->entry[i])) + { + TestCase* t = NULL; + int err = dynbench_load(l->entry[i], &t, NULL, NULL, NULL); + if (!err && t) + { + printf("%s - %s\n", t->name, t->desc); + dynbench_close(t, NULL); + } + } + } + bstrListDestroy(l); + } exit (EXIT_SUCCESS); case 'w': + case 'W': numberOfWorkgroups++; break; case 's': @@ -180,15 +245,22 @@ int main(int argc, char** argv) case 'l': bdestroy(testcase); testcase = bfromcstr(optarg); + int builtin = 1; for (i=0; i 0) compilepath[ret] = '\0'; + } + dynbench_load(testcase, &test, compilepath, compilers, compileflags); + } + if (test == NULL) { fprintf (stderr, "Error: Unknown test case %s\n",optarg); @@ -287,6 +373,8 @@ int main(int argc, char** argv) } bdestroy(testcase); break; + case 'f': + break; case '?': if (isprint (optopt)) fprintf (stderr, "Unknown option `-%c'.\n", optopt); @@ -322,6 +410,11 @@ int main(int argc, char** argv) affinity_init(); timer_init(); + memset(&sig, 0, sizeof(struct sigaction)); + sig.sa_sigaction = illhandler; + sig.sa_flags = SA_SIGINFO; + sigaction(SIGILL, &sig, NULL); + if (optPrintDomains) { bdestroy(testcase); @@ -343,21 +436,28 @@ int main(int argc, char** argv) allocator_init(numberOfWorkgroups * MAX_STREAMS); groups = (Workgroup*) malloc(numberOfWorkgroups*sizeof(Workgroup)); + memset(groups, 0, numberOfWorkgroups*sizeof(Workgroup)); tmp = 0; optind = 0; - while ((c = getopt (argc, argv, "w:t:s:l:i:aphv")) != -1) + while ((c = getopt (argc, argv, "W:w:t:s:l:i:aphv")) != -1) { switch (c) { case 'w': + case 'W': currentWorkgroup = groups+tmp; bstring groupstr = bfromcstr(optarg); + if (c == 'W') + { + currentWorkgroup->init_per_thread = 1; + } i = bstr_to_workgroup(currentWorkgroup, groupstr, test->type, test->streams); bdestroy(groupstr); size_t newsize = 0; size_t stride = test->stride; int nrThreads = currentWorkgroup->numberOfThreads; + int clsize = 128; size_t orig_size = currentWorkgroup->size; if (i == 0) { @@ -404,7 +504,8 @@ int main(int argc, char** argv) currentWorkgroup->streams[i].offset, test->type, test->stride, - currentWorkgroup->streams[i].domain); + currentWorkgroup->streams[i].domain, + currentWorkgroup->init_per_thread && nrThreads > 1); } tmp++; } @@ -413,16 +514,48 @@ int main(int argc, char** argv) exit(EXIT_FAILURE); } if (newsize != currentWorkgroup->size) + { currentWorkgroup->size = newsize; + } + if (nrThreads > 1) + { + if (currentWorkgroup->init_per_thread) + { + printf("Initialization: Each thread in domain initializes its own stream chunks\n"); + } + else + { + printf("Initialization: First thread in domain initializes the whole stream\n"); + } + } break; default: continue; break; } } + if (numberOfWorkgroups > 1) + { + int g0_numberOfThreads = groups[0].numberOfThreads; + int g0_size = groups[0].size; + for (i = 1; i < numberOfWorkgroups; i++) + { + if (g0_numberOfThreads != groups[i].numberOfThreads) + { + fprintf (stderr, "Warning: Multiple workgroups with different thread counts are not recommended. Use with case!\n"); + break; + } + } + for (i = 1; i < numberOfWorkgroups; i++) + { + if (g0_size != groups[i].size) + { + fprintf (stderr, "Warning: Multiple workgroups with different sizes are not recommended. Use with case!\n"); + break; + } + } + } - /* :WARNING:05/04/2010 08:58:05 AM:jt: At the moment the thread - * module only allows equally sized thread groups*/ for (i=0; istreams * sizeof(void*)); @@ -555,8 +689,8 @@ int main(int argc, char** argv) ownprintf("Iterations:\t\t%" PRIu64 "\n", realIter); ownprintf("Iterations per thread:\t%" PRIu64 "\n",iters_per_thread); ownprintf("Inner loop executions:\t%d\n", (int)(((double)realSize)/((double)test->stride*globalNumberOfThreads))); - ownprintf("Size (Byte):\t\t%" PRIu64 "\n", realSize * test->bytes ); - ownprintf("Size per thread:\t%" PRIu64 "\n", size_per_thread * test->bytes); + ownprintf("Size (Byte):\t\t%" PRIu64 "\n", realSize * datatypesize * test->streams); + ownprintf("Size per thread:\t%" PRIu64 "\n", size_per_thread * datatypesize * test->streams); ownprintf("Number of Flops:\t%" PRIu64 "\n", (iters_per_thread * realSize * test->flops)); ownprintf("MFlops/s:\t\t%.2f\n", 1.0E-06 * ((double) (iters_per_thread * realSize * test->flops) / time)); @@ -637,7 +771,11 @@ int main(int argc, char** argv) LIKWID_MARKER_CLOSE; #endif + if (test->dlhandle != NULL) + { + dynbench_close(test, compilepath); + } + bdestroy(HLINE); return EXIT_SUCCESS; } - diff --git a/bench/perl/AsmGen.pl b/bench/perl/AsmGen.pl index fb47a4239..f2387da9e 100755 --- a/bench/perl/AsmGen.pl +++ b/bench/perl/AsmGen.pl @@ -198,7 +198,7 @@ END | LOOP SYMBOL INUMBER SREG block {[ {FUNC => 'as::loop_entry', - ARGS => [$item{SYMBOL}[1],$item{SREG}[1]]}, + ARGS => [$item{SYMBOL}[1],$item{INUMBER}[1][1]]}, $item{block}, {FUNC => 'as::loop_exit', ARGS => [$item{SYMBOL}[1],$item{INUMBER}[1][1]]} diff --git a/bench/perl/gas.pm b/bench/perl/gas.pm index 1a74c7c90..125695b6c 100644 --- a/bench/perl/gas.pm +++ b/bench/perl/gas.pm @@ -1,11 +1,51 @@ -#!/usr/bin/env perl +#!/usr/bin/perl +# ======================================================================================= +# +# Filename: gas.pm +# +# Description: Implements gas callbacks for likwid asm parser. +# +# Version: +# Released: +# +# Author: Jan Treibig (jt), jan.treibig@gmail.com +# Project: likwid +# +# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see . +# +# ======================================================================================= package as; use Data::Dumper; use isax86; use isax86_64; -use isaarmv7; -use isaarmv8; +use isappc64; + +sub init +{ + if ($main::ISA eq 'x86') { + $AS = { HEADER => '.intel_syntax noprefix', + FOOTER => ''}; + } elsif ($main::ISA eq 'x86_64') { + $AS = { HEADER => '.intel_syntax noprefix', + FOOTER => ''}; + } elsif ($main::ISA eq 'ppc64') { + $AS = { HEADER => '', + FOOTER => ''}; + } +} $LOCAL = {}; $MODE = 'GLOBAL'; @@ -16,267 +56,261 @@ my $STACKPTR; my $BASEPTR; my $REG; my $ARG; +my $ALIGN='64'; sub emit_code { - my $code = shift; - $code =~ s/([GF]PR[0-9]+)/$REG->{$1}/g; - $code =~ s/(ARG[0-9]+)/$ARG->{$1}/g; - $code =~ s/(LOCAL[0-9]+)/$LOCAL->{$1}/g; - print "$code\n"; + my $code = shift; + $code =~ s/([GF]PR[0-9]+)/$REG->{$1}/g; + $code =~ s/(ARG[0-9]+)/$ARG->{$1}/g; + $code =~ s/(LOCAL[0-9]+)/$LOCAL->{$1}/g; + print "$code\n"; } sub align { - my $number = shift; - print ".align $number\n"; + my $number = shift; + print ".align $number\n"; } sub mode { - $cmd = shift; + $cmd = shift; - if ($cmd eq 'START') { - $MODE = 'LOCAL'; - } elsif ($cmd eq 'STOP') { - $MODE = 'GLOBAL'; - } + if ($cmd eq 'START') { + $MODE = 'LOCAL'; + } elsif ($cmd eq 'STOP') { + $MODE = 'GLOBAL'; + } } sub function_entry { - my $symbolname = shift; - my $allocate = shift; - my $distance; + my $symbolname = shift; + my $allocate = shift; + my $distance; - foreach ( (0 .. $allocate) ) { - $distance = $_ * $WORDLENGTH; - $LOCAL->{"LOCAL$_"} = "[$BASEPTR-$distance]"; - } + foreach ( (0 .. $allocate) ) { + $distance = $_ * $WORDLENGTH; + $LOCAL->{"LOCAL$_"} = "[$BASEPTR-$distance]"; + } - if($CURRENT_SECTION ne 'text') { - $CURRENT_SECTION = 'text'; - print ".text\n"; - } + if($CURRENT_SECTION ne 'text') { + $CURRENT_SECTION = 'text'; + print ".text\n"; + } - print ".global $symbolname\n"; + if ($main::ISA eq 'x86') { + print ".globl $symbolname\n"; + print ".type $symbolname, \@function\n"; + print "$symbolname :\n"; + print "push ebp\n"; + print "mov ebp, esp\n"; + $distance = $allocate * $WORDLENGTH; + print "sub esp, $distance\n" if ($allocate); + print "push ebx\n"; + print "push esi\n"; + print "push edi\n"; + } elsif ($main::ISA eq 'x86-64') { + print ".globl $symbolname\n"; + print ".type $symbolname, \@function\n"; + print "$symbolname :\n"; + print "push rbp\n"; + print "mov rbp, rsp\n"; + $distance = $allocate * $WORDLENGTH; + print "sub rsp, $distance\n" if ($allocate); + print "push rbx\n"; + print "push r12\n"; + print "push r13\n"; + print "push r14\n"; + print "push r15\n"; + } elsif ($main::ISA eq 'ppc64') { + #if ($main::ISA eq 'ppc64') { + print ".set r0,0; .set SP,1; .set RTOC,2; .set r3,3; .set r4,4;\n"; + print ".set r5,5; .set r6,6; .set r7,7; .set r8,8; .set r9,9; .set r10,10\n"; + print ".set x0,0; .set x1,1; .set x2,2; .set x3,3; .set x4,4\n"; + print ".set x5,5; .set x6,6; .set x7,7; .set x8,8; .set x9,9;\n"; + print ".set vec0,0; .set vec1,1; .set vec2,2; .set vec3,3;\n"; + print ".set vec4,4; .set vec5,5; .set vec6,6; .set vec7,7;\n"; + print ".set vec8,8; .set vec9,9; .set vec10,10; .set vec11,11;\n"; + print ".set vec12,12;\n"; + #} + print ".abiversion 2\n"; + print ".section \".toc\",\"aw\"\n"; + print ".section \".text\"\n"; + print ".align 2\n"; + print ".globl $symbolname\n"; + print ".type $symbolname, \@function\n"; + print "$symbolname :\n"; + print ".L.$symbolname:\n"; + print ".localentry $symbolname, .-$symbolname\n"; - if ($main::ISA eq 'ARMv7' or $main::ISA eq 'ARMv8') { - print ".type $symbolname, %function\n"; - } else { - print ".type $symbolname, \@function\n"; - } - print "$symbolname :\n"; - - if ($main::ISA eq 'x86') { - print "push ebp\n"; - print "mov ebp, esp\n"; - $distance = $allocate * $WORDLENGTH; - print "sub esp, $distance\n" if ($allocate); - print "push ebx\n"; - print "push esi\n"; - print "push edi\n"; - } elsif ($main::ISA eq 'x86-64') { - print "push rbp\n"; - print "mov rbp, rsp\n"; - $distance = $allocate * $WORDLENGTH; - print "sub rsp, $distance\n" if ($allocate); - print "push rbx\n"; - print "push r12\n"; - print "push r13\n"; - print "push r14\n"; - print "push r15\n"; - } elsif ($main::ISA eq 'ARMv7') { - print "push {r4-r7, lr}\n"; - print "add r7, sp, #12\n"; - print "push {r8, r10, r11}\n"; - print "vstmdb sp!, {d8-d15}\n"; - } + } } sub function_exit { - my $symbolname = shift; + my $symbolname = shift; - $LOCAL = {}; + $LOCAL = {}; - if ($main::ISA eq 'x86') { - print "pop edi\n"; - print "pop esi\n"; - print "pop ebx\n"; - print "mov esp, ebp\n"; - print "pop ebp\n"; - print "ret\n"; - } elsif ($main::ISA eq 'x86-64') { - print "pop r15\n"; - print "pop r14\n"; - print "pop r13\n"; - print "pop r12\n"; - print "pop rbx\n"; - print "mov rsp, rbp\n"; - print "pop rbp\n"; - print "ret\n"; - } elsif ($main::ISA eq 'ARMv7') { - print "vldmia sp!, {d8-d15}\n"; - print "pop {r8, r10, r11}\n"; - print "pop {r4-r7, pc}\n"; - } elsif ($main::ISA eq 'ARMv8') { - print ".exit:\n"; - print "\tret\n"; - } - print ".size $symbolname, .-$symbolname\n"; - print "\n"; + if ($main::ISA eq 'x86') { + print "pop edi\n"; + print "pop esi\n"; + print "pop ebx\n"; + print "mov esp, ebp\n"; + print "pop ebp\n"; + print "ret\n"; + print ".size $symbolname, .-$symbolname\n"; + } elsif ($main::ISA eq 'x86-64') { + print "pop r15\n"; + print "pop r14\n"; + print "pop r13\n"; + print "pop r12\n"; + print "pop rbx\n"; + print "mov rsp, rbp\n"; + print "pop rbp\n"; + print "ret\n"; + print ".size $symbolname, .-$symbolname\n"; + } elsif ($main::ISA eq 'ppc64') { + print "blr\n"; + print ".size $symbolname, .-$symbolname\n"; + } + #print ".size $symbolname, .-$symbolname\n"; + print "\n"; } sub define_data { - my $symbolname = shift; - my $type = shift; - my $value = shift; + my $symbolname = shift; + my $type = shift; + my $value = shift; - if($CURRENT_SECTION ne 'data') { - $CURRENT_SECTION = 'data'; - print ".data\n"; - } - if ($main::ISA ne 'ARMv7' and $main::ISA ne 'ARMv8') { - print ".align 64\n"; - print "$symbolname:\n"; - if ($type eq 'DOUBLE') { - print ".double $value, $value, $value, $value, $value, $value, $value, $value\n" - } elsif ($type eq 'SINGLE') { - print ".single $value, $value, $value, $value, $value, $value, $value, $value\n" - } elsif ($type eq 'INT') { - print ".int $value, $value\n" - } - } + if($CURRENT_SECTION ne 'data') { + $CURRENT_SECTION = 'data'; + print ".data\n"; + } + print ".align $ALIGN\n"; + print "$symbolname:\n"; + if ($type eq 'DOUBLE') { + print ".double $value, $value, $value, $value, $value, $value, $value, $value\n" + } elsif ($type eq 'SINGLE') { + print ".single $value, $value, $value, $value, $value, $value, $value, $value\n" + } elsif ($type eq 'INT') { + print ".int $value, $value\n" + } } sub define_offset { - my $symbolname = shift; - my $type = shift; - my $value = shift; + my $symbolname = shift; + my $type = shift; + my $value = shift; - if($CURRENT_SECTION ne 'data') { - $CURRENT_SECTION = 'data'; - print ".data\n"; - } - if ($main::ISA eq 'ARMv7' or $main::ISA eq 'ARMv8') { - print ".align 2\n"; - } else { - print ".align 16\n"; - } - print "$symbolname:\n"; - print ".int $value\n"; + if($CURRENT_SECTION ne 'data') { + $CURRENT_SECTION = 'data'; + print ".data\n"; + } + print ".align $ALIGN\n"; + print "$symbolname:\n"; + print ".int $value\n"; } sub loop_entry { - my $symbolname = shift; - my $stopping_criterion = shift; - $stopping_criterion = $REG->{$stopping_criterion} if( exists $REG->{$stopping_criterion}); + my $symbolname = shift; + #my $stopping_criterion = shift; + my $step = shift; - if ($main::ISA eq 'x86') { - print "xor eax, eax\n"; - } elsif ($main::ISA eq 'x86-64') { - print "xor rax, rax\n"; - } elsif ($main::ISA eq 'ARMv7') { - print "mov r4, #0\n"; - } elsif ($main::ISA eq 'ARMv8') { - print "\tmov x6, 0\n"; - print ".loop:\n"; - #print "\tcmp w0, w6\n"; - #print "\tblt .exit\n"; - } - if ($main::ISA eq 'ARMv7') { - print ".align 2\n"; - } elsif ($main::ISA eq 'ARMv8') { - print "\n"; - } else { - print ".align 16\n"; - } - if ($MODE eq 'GLOBAL') { - print "$symbolname :\n"; - } elsif ($main::ISA ne 'ARMv8') { - print "1:\n"; - } + if ($main::ISA eq 'x86') { + print "xor eax, eax\n"; + print ".align $ALIGN\n"; + if ($MODE eq 'GLOBAL') { + print "$symbolname :\n"; + } else { + print "1:\n"; + } + } elsif ($main::ISA eq 'x86-64') { + print "xor rax, rax\n"; + print ".align $ALIGN\n"; + if ($MODE eq 'GLOBAL') { + print "$symbolname :\n"; + } else { + print "1:\n"; + } + } elsif ($main::ISA eq 'ppc64') { + print "li r0, r10\n"; + print "li r10, $step\n"; + print "divd r10, r3, r10\n"; + print "mtctr r10\n"; + print "li r10, r0\n"; + print "$symbolname:\n"; + } } sub loop_exit { - my $symbolname = shift; - my $step = shift; + my $symbolname = shift; + my $step = shift; - if ($main::ISA eq 'x86') { - print "add eax, $step\n"; - print "cmp eax, edi\n"; - } elsif ($main::ISA eq 'x86-64') { - print "add rax, $step\n"; - print "cmp rax, rdi\n"; - } elsif ($main::ISA eq 'ARMv7') { - print "add r4, #$step\n"; - print "cmp r4, r0\n"; - } elsif ($main::ISA eq 'ARMv8') { - print "\tadd x6, x6, #$step\n"; - print "\tcmp x6, x0\n"; - print "\tblt .loop\n"; - } - if ($MODE eq 'GLOBAL') { - print "jl $symbolname\n"; - }else { - if ($main::ISA eq 'ARMv7') { - print "blt 1b\n"; - } elsif ($main::ISA eq 'ARMv8') { - #print "bgt 1b\n"; - print "\n"; - } else { - print "jl 1b\n"; - } - } - print "\n"; + if ($main::ISA eq 'x86') { + print "add eax, $step\n"; + print "cmp eax, edi\n"; + if ($MODE eq 'GLOBAL') { + print "jl $symbolname\n"; + } else { + print "jl 1b\n"; + } + print "\n"; + } elsif ($main::ISA eq 'x86-64') { + print "add rax, $step\n"; + print "cmp rax, rdi\n"; + if ($MODE eq 'GLOBAL') { + print "jl $symbolname\n"; + } else { + print "jl 1b\n"; + } + print "\n"; + } elsif ($main::ISA eq 'ppc64') { + print "bdnz $symbolname\n"; + } } sub isa_init { - if ($main::ISA eq 'x86') { - $WORDLENGTH = $isax86::WORDLENGTH_X86 ; - $STACKPTR = $isax86::STACKPTR_X86 ; - $BASEPTR = $isax86::BASEPTR_X86 ; - $REG = $isax86::REG_X86; - $ARG = $isax86::ARG_X86 ; - $AS = { HEADER => '.intel_syntax noprefix', - FOOTER => '' }; - } elsif ($main::ISA eq 'x86-64') { - $WORDLENGTH = $isax86_64::WORDLENGTH_X86_64; - $STACKPTR = $isax86_64::STACKPTR_X86_64 ; - $BASEPTR = $isax86_64::BASEPTR_X86_64 ; - $REG = $isax86_64::REG_X86_64; - $ARG = $isax86_64::ARG_X86_64 ; - $AS = { HEADER => '.intel_syntax noprefix', - FOOTER => '' }; - } elsif ($main::ISA eq 'ARMv7') { - $BASEPTR = $isaarmv7::BASEPTR_ARMv7; - $WORDLENGTH = $isaarmv7::WORDLENGTH_ARMv7; - $STACKPTR = $isaarmv7::STACKPTR_ARMv7 ; - $REG = $isaarmv7::REG_ARMv7; - $ARG = $isaarmv7::ARG_ARMv7 ; - $AS = { HEADER => ".cpu cortex-a15\n.fpu neon-vfpv4", - FOOTER => '' }; - } elsif ($main::ISA eq 'ARMv8') { - $BASEPTR = $isaarmv8::BASEPTR_ARMv8; - $WORDLENGTH = $isaarmv8::WORDLENGTH_ARMv8; - $STACKPTR = $isaarmv8::STACKPTR_ARMv8 ; - $REG = $isaarmv8::REG_ARMv8; - $ARG = $isaarmv8::ARG_ARMv8 ; - $AS = { HEADER => ".cpu generic+fp+simd", - FOOTER => '' }; - - } - + if ($main::ISA eq 'x86') { + $WORDLENGTH = $isax86::WORDLENGTH_X86 ; + $STACKPTR = $isax86::STACKPTR_X86 ; + $BASEPTR = $isax86::BASEPTR_X86 ; + $REG = $isax86::REG_X86; + $ARG = $isax86::ARG_X86 ; + $AS = { HEADER => '.intel_syntax noprefix', + FOOTER => ''}; + $ALIGN = '64'; + } elsif ($main::ISA eq 'x86-64') { + $WORDLENGTH = $isax86_64::WORDLENGTH_X86_64; + $STACKPTR = $isax86_64::STACKPTR_X86_64 ; + $BASEPTR = $isax86_64::BASEPTR_X86_64 ; + $REG = $isax86_64::REG_X86_64; + $ARG = $isax86_64::ARG_X86_64 ; + $AS = { HEADER => '.intel_syntax noprefix', + FOOTER => ''}; + $ALIGN = '64'; + } elsif ($main::ISA eq 'ppc64') { + $WORDLENGTH = $isappc64::WORDLENGTH_PPC64; + $STACKPTR = $isappc64::STACKPTR_PPC64 ; + $BASEPTR = $isappc64::BASEPTR_PPC64 ; + $REG = $isappc64::REG_PPC64; + $ARG = $isappc64::ARG_PPC64 ; + $AS = { HEADER => '', + FOOTER => ''}; + $ALIGN = '16'; + } } + 1; diff --git a/bench/perl/generatePas.pl b/bench/perl/generatePas.pl index 81dde59b6..9ce2240a1 100755 --- a/bench/perl/generatePas.pl +++ b/bench/perl/generatePas.pl @@ -114,6 +114,7 @@ $file =~ /([A-Za-z_0-9]+)\.ptt/; $name = $1; + if ($name =~ /^$/) { continue; } $isLoop = 0; $skip=0; @@ -121,6 +122,7 @@ $prolog=''; $loop=''; $desc=''; + $streams=1; my $loads=-1; my $stores=-1; my $branches=-1; diff --git a/bench/perl/isappc64.pm b/bench/perl/isappc64.pm new file mode 100644 index 000000000..8bdca1234 --- /dev/null +++ b/bench/perl/isappc64.pm @@ -0,0 +1,89 @@ +#!/usr/bin/perl + +package isappc64; + +$WORDLENGTH_PPC64 = 8; +$STACKPTR_PPC64 = '1'; +$BASEPTR_PPC64 = '2'; + +$REG_PPC64 = { GPR1 => '3', + GPR2 => '4', + GPR3 => '5', + GPR4 => '6', + GPR5 => '7', + GPR6 => '8', + GPR7 => '9', + GPR8 => '10', + GPR9 => '11', + GPR10 => '12', + GPR11 => '13', + GPR12 => '14', + GPR13 => '15', + GPR14 => '16', + GPR15 => '17', + GPR16 => '18', + GPR17 => '19', + GPR18 => '20', + GPR19 => '21', + GPR20 => '22', + GPR21 => '23', + GPR22 => '24', + GPR23 => '25', + GPR24 => '26', + GPR25 => '27', + GPR26 => '28', + GPR27 => '29', + GPR28 => '30', + GPR29 => '31', + FPR1 => '0', + FPR2 => '1', + FPR3 => '2', + FPR4 => '3', + FPR5 => '4', + FPR6 => '5', + FPR7 => '6', + FPR8 => '7', + FPR9 => '8', + FPR10 => '9', + FPR11 => '10', + FPR12 => '11', + FPR13 => '12', + FPR14 => '13', + FPR15 => '14', + FPR16 => '15', + FPR17 => '16', + FPR18 => '17', + FPR19 => '18', + FPR20 => '19', + FPR21 => '20', + FPR22 => '21', + FPR23 => '22', + FPR24 => '23', + FPR25 => '24', + FPR26 => '25', + FPR27 => '26', + FPR28 => '27', + FPR29 => '28', + FPR30 => '29', + FPR31 => '30', + FPR32 => '31'}; + +$ARG_PPC64 = { ARG1 => '3', + ARG2 => '4', + ARG3 => '5', + ARG4 => '6', + ARG5 => '7', + ARG6 => '8', + ARG7 => '9', + ARG8 => '10', + ARG9 => '[rbp+56]'}; + +sub emit_code +{ + my $code = shift; + $code =~ s/([GF]PR[0-9]+)/$isa::REG->{$1}/g; + print "$code\n"; +} + + +1; diff --git a/bench/power/add_scalar4.ptt b/bench/power/add_scalar4.ptt new file mode 100644 index 000000000..082d0ac45 --- /dev/null +++ b/bench/power/add_scalar4.ptt @@ -0,0 +1,36 @@ +STREAMS 3 +TYPE DOUBLE +DESC Double-precision add, only scalar operations +FLOPS 1 +BYTES 24 +LOADS 2 +STORES 1 +INSTR_LOOP 19 +INSTR_CONST 5 +UOPS 19 + +LOOP 4 + +lfd 1, 0(STR0) +lfd 2, 0(STR1) +fadd 0, 1, 2 +stfd 0, 0(STR2) + +lfd 4, 8(STR0) +lfd 5, 8(STR1) +fadd 3, 4, 5 +stfd 3, 8(STR2) + +lfd 7, 16(STR0) +lfd 8, 16(STR1) +fadd 6, 7, 8 +stfd 6, 16(STR2) + +lfd 10, 24(STR0) +lfd 11, 24(STR1) +fadd 9, 10, 11 +stfd 9, 24(STR2) + +addi STR0, STR0, 32 +addi STR1, STR1, 32 +addi STR2, STR2, 32 diff --git a/bench/power/add_vsx1.ptt b/bench/power/add_vsx1.ptt new file mode 100644 index 000000000..0c5f35a36 --- /dev/null +++ b/bench/power/add_vsx1.ptt @@ -0,0 +1,13 @@ +STREAMS 3 +TYPE DOUBLE +FLOPS 1 +BYTES 24 + +li r7, 0 # load immediate value of 0 into r7 (used as offset in addressing) + +LOOP 2 +lxvd2x x1, STR0, r7 +lxvd2x x2, STR1, r7 +xvadddp x0, x1, x2 +stxvd2x x0, STR2, r7 +addi r7, r7, 16 diff --git a/bench/power/add_vsx2.ptt b/bench/power/add_vsx2.ptt new file mode 100644 index 000000000..9f3018814 --- /dev/null +++ b/bench/power/add_vsx2.ptt @@ -0,0 +1,20 @@ +STREAMS 3 +TYPE DOUBLE +FLOPS 1 +BYTES 24 + +li r7, 0 # load immediate value of 0 into r7 (used as offset in addressing) +li r8, 16 # load immediate value of 16 into r8 (used as offset in addressing) + +LOOP 4 +lxvd2x x1, STR0, r7 +lxvd2x x2, STR1, r7 +xvadddp x0, x1, x2 +stxvd2x x0, STR2, r7 +addi r7, r7, 32 + +lxvd2x x1, STR0, r8 +lxvd2x x2, STR1, r8 +xvadddp x0, x1, x2 +stxvd2x x0, STR2, r8 +addi r8, r8, 32 diff --git a/bench/power/add_vsx4.ptt b/bench/power/add_vsx4.ptt new file mode 100644 index 000000000..e91f0971b --- /dev/null +++ b/bench/power/add_vsx4.ptt @@ -0,0 +1,34 @@ +STREAMS 3 +TYPE DOUBLE +FLOPS 1 +BYTES 24 + +li r7, 0 # load immediate value of 0 into r7 (used as offset in addressing) +li r8, 16 # load immediate value of 16 into r8 (used as offset in addressing) +li r9, 32 # load immediate value of 32 into r9 (used as offset in addressing) +li r10, 48 # load immediate value of 48 into r10 (used as offset in addressing) + +LOOP 8 +lxvd2x x1, STR0, r7 +lxvd2x x2, STR1, r7 +xvadddp x0, x1, x2 +stxvd2x x0, STR2, r7 +addi r7, r7, 64 + +lxvd2x x3, STR0, r8 +lxvd2x x4, STR1, r8 +xvadddp x5, x3, x4 +stxvd2x x5, STR2, r8 +addi r8, r8, 64 + +lxvd2x x1, STR0, r9 +lxvd2x x2, STR1, r9 +xvadddp x0, x1, x2 +stxvd2x x0, STR2, r9 +addi r9, r9, 64 + +lxvd2x x1, STR0, r10 +lxvd2x x2, STR1, r10 +xvadddp x0, x1, x2 +stxvd2x x0, STR2, r10 +addi r10, r10, 64 diff --git a/bench/power/add_vsx4_mem.ptt b/bench/power/add_vsx4_mem.ptt new file mode 100644 index 000000000..11ad990de --- /dev/null +++ b/bench/power/add_vsx4_mem.ptt @@ -0,0 +1,38 @@ +STREAMS 3 +TYPE DOUBLE +FLOPS 1 +BYTES 24 + +li r7, 0 # load immediate value of 0 into r7 (used as offset in addressing) +li r8, 16 # load immediate value of 16 into r8 (used as offset in addressing) +li r9, 32 # load immediate value of 32 into r9 (used as offset in addressing) +li r10, 48 # load immediate value of 48 into r10 (used as offset in addressing) + +LOOP 8 +lxvd2x x1, STR0, r7 +lxvd2x x2, STR1, r7 +xvadddp x0, x1, x2 +dcbz STR2, r7 +stxvd2x x0, STR2, r7 +addi r7, r7, 64 + +lxvd2x x1, STR0, r8 +lxvd2x x2, STR1, r8 +xvadddp x0, x1, x2 +dcbz STR2, r8 +stxvd2x x0, STR2, r8 +addi r8, r8, 64 + +lxvd2x x1, STR0, r9 +lxvd2x x2, STR1, r9 +xvadddp x0, x1, x2 +dcbz STR2, r9 +stxvd2x x0, STR2, r9 +addi r9, r9, 64 + +lxvd2x x1, STR0, r10 +lxvd2x x2, STR1, r10 +xvadddp x0, x1, x2 +dcbz STR2, r10 +stxvd2x x0, STR2, r10 +addi r10, r10, 64 diff --git a/bench/power/copy_scalar4.ptt b/bench/power/copy_scalar4.ptt new file mode 100644 index 000000000..6dd430add --- /dev/null +++ b/bench/power/copy_scalar4.ptt @@ -0,0 +1,21 @@ +STREAMS 2 +TYPE DOUBLE +FLOPS 0 +BYTES 16 + +LOOP 4 + +lfd 0, 0(STR0) +stfd 0, 0(STR1) + +lfd 1, 8(STR0) +stfd 1, 8(STR1) + +lfd 2, 16(STR0) +stfd 2, 16(STR1) + +lfd 3, 24(STR0) +stfd 3, 24(STR1) + +addi STR0, STR0, 32 +addi STR1, STR1, 32 diff --git a/bench/power/copy_vsx4.ptt b/bench/power/copy_vsx4.ptt new file mode 100644 index 000000000..85d43dd02 --- /dev/null +++ b/bench/power/copy_vsx4.ptt @@ -0,0 +1,31 @@ +STREAMS 2 +TYPE DOUBLE +FLOPS 0 +BYTES 16 +INSTR_LOOP 3 +INSTR_CONST 1 +UOPS 4 +LOADS 1 +STORES 1 + +li r7, 0 # load immediate value of 0 into r7 (used as offset in addressing) +li r8, 16 # load immediate value of 16 into r8 (used as offset in addressing) +li r9, 32 # load immediate value of 32 into r9 (used as offset in addressing) +li r10, 48 # load immediate value of 48 into r10 (used as offset in addressing) + +LOOP 8 +lxvd2x x1, STR0, r7 +stxvd2x x0, STR1, r7 +addi r7, r7, 64 + +lxvd2x x1, STR0, r8 +stxvd2x x0, STR1, r8 +addi r8, r8, 64 + +lxvd2x x1, STR0, r9 +stxvd2x x0, STR1, r9 +addi r9, r9, 64 + +lxvd2x x1, STR0, r10 +stxvd2x x0, STR1, r10 +addi r10, r10, 64 diff --git a/bench/power/daxpy_vsx4.ptt b/bench/power/daxpy_vsx4.ptt new file mode 100644 index 000000000..49fa744ac --- /dev/null +++ b/bench/power/daxpy_vsx4.ptt @@ -0,0 +1,46 @@ +STREAMS 2 +TYPE DOUBLE +FLOPS 2 +BYTES 24 + +.set vec0,0; .set vec1,1; .set vec2,2; .set vec3,3; +.set vec4,4; .set vec5,5; .set vec6,6; .set vec7,7; +.set vec8,8; .set vec9,9; .set vec10,10; .set vec11,11; + +li r7, 0 # load immediate value of 0 into r7 (used as offset in addressing) +li r8, 16 # load immediate value of 16 into r8 (used as offset in addressing) +li r9, 32 # load immediate value of 32 into r9 (used as offset in addressing) +li r10, 48 # load immediate value of 48 into r10 (used as offset in addressing) + +xvsubdp vec0, vec0, vec0 # set to zero +xvsubdp vec1, vec1, vec1 # set to zero +xvsubdp vec2, vec2, vec2 # set to zero +xvsubdp vec3, vec3, vec3 # set to zero + +LOOP 8 + +lxvx vec4, STR0, r7 +lxvx vec5, STR1, r7 + +lxvx vec6, STR0, r8 +lxvx vec7, STR1, r8 + +lxvx vec8, STR0, r9 +lxvx vec9, STR1, r9 + +lxvx vec10, STR0, r10 +lxvx vec11, STR1, r10 + +# NB: Not a real DAXPY but same instruction mix (we're multiplying x and y) +xvmaddadp vec0, vec4, vec5 +xvmaddadp vec1, vec6, vec7 +xvmaddadp vec2, vec8, vec9 +xvmaddadp vec3, vec10, vec11 + +stxvx vec0, STR0, r7 +stxvx vec1, STR0, r8 +stxvx vec2, STR0, r9 +stxvx vec3, STR0, r10 + +addi STR0, STR0, 64 +addi STR1, STR1, 64 diff --git a/bench/power/ddot_kahan_scalar4_dp.ptt b/bench/power/ddot_kahan_scalar4_dp.ptt new file mode 100644 index 000000000..2858e63c8 --- /dev/null +++ b/bench/power/ddot_kahan_scalar4_dp.ptt @@ -0,0 +1,85 @@ +STREAMS 2 +TYPE DOUBLE +FLOPS 2 +BYTES 16 + +li r10, 6 # sixteen (4x4) elements per loop iteration +divd r10, r3, r10 # r3 holds loop limit, divide by elements per loop iteration +mtctr r10 # move to count register + +# set accumulation registers to zero +fsub 0, 0, 0 +fsub 1, 1, 1 +fsub 2, 2, 2 +fsub 3, 3, 3 +fsub 4, 4, 4 +fsub 5, 5, 5 + +# set c registers to zero +fsub 6, 6, 6 +fsub 7, 7, 7 +fsub 8, 8, 8 +fsub 9, 9, 9 +fsub 10, 10, 10 +fsub 11, 11, 11 + +LOOP 6 +# load A[i] +lfd 12, 0(STR0) +lfd 13, 8(STR0) +lfd 14, 16(STR0) +lfd 15, 24(STR0) +lfd 16, 32(STR0) +lfd 17, 40(STR0) + +# load B[i] +lfd 18, 0(STR1) +lfd 19, 8(STR1) +lfd 20, 16(STR1) +lfd 21, 24(STR1) +lfd 22, 32(STR1) +lfd 23, 40(STR1) + +# y = A[i]*B[i]-c +# fmsub FRT,FRA,FRC,FRB --> FRT <- [(FRA)×(FRC)] - (FRB) +fmsub 6, 12, 18, 6 +fmsub 7, 13, 19, 7 +fmsub 8, 14, 20, 8 +fmsub 9, 15, 21, 9 +fmsub 10, 16, 22, 10 +fmsub 11, 17, 23, 11 + +# t = sum + y +fadd 24, 0, 6 +fadd 25, 1, 7 +fadd 26, 2, 8 +fadd 27, 3, 9 +fadd 28, 4, 10 +fadd 29, 5, 11 + +# tmp = t - sum; we can writewrite registers holding 'sum' +fsub 0, 24, 0 +fsub 1, 25, 1 +fsub 2, 26, 2 +fsub 3, 27, 3 +fsub 4, 28, 4 +fsub 5, 29, 5 + +# c = tmp - y = (t - sum) - y; we can overwrite register holding 'y' +fsub 6, 0, 6 +fsub 7, 1, 7 +fsub 8, 2, 8 +fsub 9, 3, 9 +fsub 10, 4, 10 +fsub 11, 5, 11 + +# sum = t +fmr 0, 24 +fmr 1, 25 +fmr 2, 26 +fmr 3, 27 +fmr 4, 28 +fmr 5, 29 + +addi STR0, STR0, 48 +addi STR1, STR1, 48 diff --git a/bench/power/ddot_kahan_scalar4_sp.ptt b/bench/power/ddot_kahan_scalar4_sp.ptt new file mode 100644 index 000000000..68c1ec595 --- /dev/null +++ b/bench/power/ddot_kahan_scalar4_sp.ptt @@ -0,0 +1,85 @@ +STREAMS 2 +TYPE SINGLE +FLOPS 2 +BYTES 8 + +li r10, 6 # sixteen (4x4) elements per loop iteration +divd r10, r3, r10 # r3 holds loop limit, divide by elements per loop iteration +mtctr r10 # move to count register + +# set accumulation registers to zero +fsubs 0, 0, 0 +fsubs 1, 1, 1 +fsubs 2, 2, 2 +fsubs 3, 3, 3 +fsubs 4, 4, 4 +fsubs 5, 5, 5 + +# set c registers to zero +fsubs 6, 6, 6 +fsubs 7, 7, 7 +fsubs 8, 8, 8 +fsubs 9, 9, 9 +fsubs 10, 10, 10 +fsubs 11, 11, 11 + +LOOP 6 +# load A[i] +lfs 12, 0(STR0) +lfs 13, 4(STR0) +lfs 14, 8(STR0) +lfs 15, 12(STR0) +lfs 16, 16(STR0) +lfs 17, 20(STR0) + +# load B[i] +lfs 18, 0(STR1) +lfs 19, 4(STR1) +lfs 20, 8(STR1) +lfs 21, 12(STR1) +lfs 22, 16(STR1) +lfs 23, 20(STR1) + +# y = A[i]*B[i]-c +# fmsub FRT,FRA,FRC,FRB --> FRT <- [(FRA)×(FRC)] - (FRB) +fmsubs 6, 12, 18, 6 +fmsubs 7, 13, 19, 7 +fmsubs 8, 14, 20, 8 +fmsubs 9, 15, 21, 9 +fmsubs 10, 16, 22, 10 +fmsubs 11, 17, 23, 11 + +# t = sum + y +fadds 24, 0, 6 +fadds 25, 1, 7 +fadds 26, 2, 8 +fadds 27, 3, 9 +fadds 28, 4, 10 +fadds 29, 5, 11 + +# tmp = t - sum; we can writewrite registers holding 'sum' +fsubs 0, 24, 0 +fsubs 1, 25, 1 +fsubs 2, 26, 2 +fsubs 3, 27, 3 +fsubs 4, 28, 4 +fsubs 5, 29, 5 + +# c = tmp - y = (t - sum) - y; we can overwrite register holding 'y' +fsubs 6, 0, 6 +fsubs 7, 1, 7 +fsubs 8, 2, 8 +fsubs 9, 3, 9 +fsubs 10, 4, 10 +fsubs 11, 5, 11 + +# sum = t +fmr 0, 24 +fmr 1, 25 +fmr 2, 26 +fmr 3, 27 +fmr 4, 28 +fmr 5, 29 + +addi STR0, STR0, 24 +addi STR1, STR1, 24 diff --git a/bench/power/ddot_kahan_vsx4_dp.ptt b/bench/power/ddot_kahan_vsx4_dp.ptt new file mode 100644 index 000000000..d4b5c8e77 --- /dev/null +++ b/bench/power/ddot_kahan_vsx4_dp.ptt @@ -0,0 +1,91 @@ +STREAMS 2 +TYPE DOUBLE +FLOPS 2 +BYTES 16 + +li r10, 12 # sixteen (4x4) elements per loop iteration +divd r10, r3, r10 # r3 holds loop limit, divide by elements per loop iteration +mtctr r10 # move to count register +li r6, 0 # load immediate value of 0 into r7 (used as offset in addressing) +li r7, 16 # load immediate value of 16 into r8 (used as offset in addressing) +li r8, 32 # load immediate value of 32 into r9 (used as offset in addressing) +li r9, 48 # load immediate value of 48 into r10 (used as offset in addressing) +li r10, 64 # load immediate value of 48 into r10 (used as offset in addressing) +li 11, 80 # load immediate value of 48 into r10 (used as offset in addressing) + +# set accumulation registers to zero +xvsubdp 0, 0, 0 +xvsubdp 1, 1, 1 +xvsubdp 2, 2, 2 +xvsubdp 3, 3, 3 +xvsubdp 4, 4, 4 +xvsubdp 5, 5, 5 + +# set c registers to zero +xvsubdp 6, 6, 6 +xvsubdp 7, 7, 7 +xvsubdp 8, 8, 8 +xvsubdp 9, 9, 9 +xvsubdp 10, 10, 10 +xvsubdp 11, 11, 11 + +LOOP 12 +# load A[i] +lxvd2x 12, STR0, r6 +lxvd2x 13, STR0, r7 +lxvd2x 14, STR0, r8 +lxvd2x 15, STR0, r9 +lxvd2x 16, STR0, r10 +lxvd2x 17, STR0, 11 + +# load B[i] +lxvd2x 18, STR1, r6 +lxvd2x 19, STR1, r7 +lxvd2x 20, STR1, r8 +lxvd2x 21, STR1, r9 +lxvd2x 22, STR1, r10 +lxvd2x 23, STR1, 11 + +# y = A[i]*B[i]-c +# xvmsubadp xt, xa, xb --> xt = xa*xb-xt +xvmsubadp 6, 12, 18 +xvmsubadp 7, 13, 19 +xvmsubadp 8, 14, 20 +xvmsubadp 9, 15, 21 +xvmsubadp 10, 16, 22 +xvmsubadp 11, 17, 23 + +# t = sum + y +xvadddp 24, 0, 6 +xvadddp 25, 1, 7 +xvadddp 26, 2, 8 +xvadddp 27, 3, 9 +xvadddp 28, 4, 10 +xvadddp 29, 5, 11 + +# tmp = t - sum; we can writewrite registers holding 'sum' +xvsubdp 0, 24, 0 +xvsubdp 1, 25, 1 +xvsubdp 2, 26, 2 +xvsubdp 3, 27, 3 +xvsubdp 4, 28, 4 +xvsubdp 5, 29, 5 + +# c = tmp - y = (t - sum) - y; we can overwrite register holding 'y' +xvsubdp 6, 0, 6 +xvsubdp 7, 1, 7 +xvsubdp 8, 2, 8 +xvsubdp 9, 3, 9 +xvsubdp 10, 4, 10 +xvsubdp 11, 5, 11 + +# sum = t +xvmovdp 0, 24 +xvmovdp 1, 25 +xvmovdp 2, 26 +xvmovdp 3, 27 +xvmovdp 4, 28 +xvmovdp 5, 29 + +addi STR0, STR0, 96 +addi STR1, STR1, 96 diff --git a/bench/power/ddot_kahan_vsx4_sp_reorder.ptt b/bench/power/ddot_kahan_vsx4_sp_reorder.ptt new file mode 100644 index 000000000..3086fbf45 --- /dev/null +++ b/bench/power/ddot_kahan_vsx4_sp_reorder.ptt @@ -0,0 +1,88 @@ +STREAMS 2 +TYPE SINGLE +FLOPS 2 +BYTES 8 + +li r10, 24 # sixteen (4x4) elements per loop iteration +divd r10, r3, r10 # r3 holds loop limit, divide by elements per loop iteration +mtctr r10 # move to count register +li r6, 0 # load immediate value of 0 into r7 (used as offset in addressing) +li r7, 16 # load immediate value of 16 into r8 (used as offset in addressing) +li r8, 32 # load immediate value of 32 into r9 (used as offset in addressing) +li r9, 48 # load immediate value of 48 into r10 (used as offset in addressing) +li r10, 64 # load immediate value of 48 into r10 (used as offset in addressing) +li 11, 80 # load immediate value of 48 into r10 (used as offset in addressing) + +# set accumulation registers to zero +xvsubsp 0, 0, 0 +xvsubsp 1, 1, 1 +xvsubsp 2, 2, 2 +xvsubsp 3, 3, 3 +xvsubsp 4, 4, 4 +xvsubsp 5, 5, 5 + +# set c registers to zero +xvsubsp 6, 6, 6 +xvsubsp 7, 7, 7 +xvsubsp 8, 8, 8 +xvsubsp 9, 9, 9 +xvsubsp 10, 10, 10 +xvsubsp 11, 11, 11 + +LOOP 24 +# load A[i] +lxvd2x 12, STR0, r6 +lxvd2x 18, STR1, r6 +lxvd2x 13, STR0, r7 +lxvd2x 19, STR1, r7 +xvmsubasp 26, 14, 20 +xvmsubasp 27, 15, 21 +xvaddsp 34, 4, 28 +xvaddsp 35, 5, 29 +xvsubsp 36, 30, 0 +xvsubsp 37, 31, 1 +xvsubsp 8, 38, 26 +xvsubsp 9, 39, 27 +xvmovsp 4, 34 +xvmovsp 5, 35 + +lxvd2x 14, STR0, r8 +lxvd2x 20, STR1, r8 +lxvd2x 15, STR0, r9 +lxvd2x 21, STR1, r9 +xvmsubasp 28, 16, 22 +xvmsubasp 29, 17, 23 +xvaddsp 30, 0, 24 +xvaddsp 31, 1, 25 +xvsubsp 38, 32, 2 +xvsubsp 39, 33, 3 +xvsubsp 10, 40, 28 +xvsubsp 11, 41, 29 +xvmovsp 0, 30 +xvmovsp 1, 31 + +lxvd2x 16, STR0, r10 +lxvd2x 22, STR1, r10 +lxvd2x 17, STR0, 11 +lxvd2x 23, STR1, 11 + addi STR0, STR0, 96 + addi STR1, STR1, 96 +xvmsubasp 24, 12, 18 +xvmsubasp 25, 13, 19 +xvaddsp 32, 2, 26 +xvaddsp 33, 3, 27 +xvsubsp 40, 34, 4 +xvsubsp 41, 35, 5 +xvsubsp 6, 36, 24 +xvsubsp 7, 37, 25 +xvmovsp 2, 32 +xvmovsp 3, 33 + + + +# tmp = t - sum; we can writewrite registers holding 'sum' + +# c = tmp - y = (t - sum) - y; we can overwrite register holding 'y' + +# sum = t + diff --git a/bench/power/ddot_scalar4.ptt b/bench/power/ddot_scalar4.ptt new file mode 100644 index 000000000..e4dd2377f --- /dev/null +++ b/bench/power/ddot_scalar4.ptt @@ -0,0 +1,30 @@ +STREAMS 2 +TYPE DOUBLE +FLOPS 2 +BYTES 16 + +fsub 0, 0, 0 # zero +fsub 1, 1, 1 # zero +fsub 2, 2, 2 # zero +fsub 3, 3, 3 # zero + +LOOP 4 + +lfd 4, 0(STR0) +lfd 5, 0(STR1) +fmadd 0, 4, 5, 0 + +lfd 6, 8(STR0) +lfd 7, 8(STR1) +fmadd 1, 6, 7, 1 + +lfd 8, 16(STR0) +lfd 9, 16(STR1) +fmadd 2, 8, 9, 2 + +lfd 10, 24(STR0) +lfd 11, 24(STR1) +fmadd 3, 10, 11, 3 + +addi STR0, STR0, 32 +addi STR1, STR1, 32 diff --git a/bench/power/ddot_scalar4_sp.ptt b/bench/power/ddot_scalar4_sp.ptt new file mode 100644 index 000000000..c4a32b0e1 --- /dev/null +++ b/bench/power/ddot_scalar4_sp.ptt @@ -0,0 +1,33 @@ +STREAMS 2 +TYPE SINGLE +FLOPS 2 +BYTES 8 + +li r10, 4 # four elements per loop iteration +divd r10, r3, r10 # r3 holds loop limit, divide by elements per loop iteration +mtctr r10 # move to count register +fsubs 0, 0, 0 # zero +fsubs 1, 1, 1 # zero +fsubs 2, 2, 2 # zero +fsubs 3, 3, 3 # zero + +LOOP 4 + +lfs 4, 0(STR0) +lfs 5, 0(STR1) +fmadds 0, 4, 5, 0 + +lfs 6, 4(STR0) +lfs 7, 4(STR1) +fmadds 1, 6, 7, 1 + +lfs 8, 8(STR0) +lfs 9, 8(STR1) +fmadds 2, 8, 9, 2 + +lfs 10, 12(STR0) +lfs 11, 12(STR1) +fmadds 3, 10, 11, 3 + +addi STR0, STR0, 16 +addi STR1, STR1, 16 diff --git a/bench/power/ddot_vsx4.ptt b/bench/power/ddot_vsx4.ptt new file mode 100644 index 000000000..3568157e7 --- /dev/null +++ b/bench/power/ddot_vsx4.ptt @@ -0,0 +1,35 @@ +STREAMS 2 +TYPE DOUBLE +FLOPS 2 +BYTES 16 + +li r7, 0 # load immediate value of 0 into r7 (used as offset in addressing) +li r8, 16 # load immediate value of 16 into r8 (used as offset in addressing) +li r9, 32 # load immediate value of 32 into r9 (used as offset in addressing) +li r10, 48 # load immediate value of 48 into r10 (used as offset in addressing) + +xvsubdp 0, 0, 0 # set to zero +xvsubdp 1, 1, 1 # set to zero +xvsubdp 2, 2, 2 # set to zero +xvsubdp 3, 3, 3 # set to zero + +LOOP 8 + +lxvd2x 4, STR0, r7 +lxvd2x 5, STR1, r7 +xvmaddadp 0, 4, 5 + +lxvd2x 6, STR0, r8 +lxvd2x 7, STR1, r8 +xvmaddadp 1, 6, 7 + +lxvd2x 8, STR0, r9 +lxvd2x 9, STR1, r9 +xvmaddadp 2, 8, 9 + +lxvd2x 10, STR0, r10 +lxvd2x 11, STR1, r10 +xvmaddadp 3, 10, 11 + +addi STR0, STR0, 64 +addi STR1, STR1, 64 diff --git a/bench/power/ddot_vsx4_sp.ptt b/bench/power/ddot_vsx4_sp.ptt new file mode 100644 index 000000000..4ca9a0261 --- /dev/null +++ b/bench/power/ddot_vsx4_sp.ptt @@ -0,0 +1,38 @@ +STREAMS 2 +TYPE SINGLE +FLOPS 2 +BYTES 8 + +li r10, 16 # eight (4x2) elements per loop iteration +divd r10, r3, r10 # r3 holds loop limit, divide by elements per loop iteration +mtctr r10 # move to count register +li r7, 0 # load immediate value of 0 into r7 (used as offset in addressing) +li r8, 16 # load immediate value of 16 into r8 (used as offset in addressing) +li r9, 32 # load immediate value of 32 into r9 (used as offset in addressing) +li r10, 48 # load immediate value of 48 into r10 (used as offset in addressing) + +xvsubsp 0, 0, 0 # set to zero +xvsubsp 1, 1, 1 # set to zero +xvsubsp 2, 2, 2 # set to zero +xvsubsp 3, 3, 3 # set to zero + +LOOP 16 + +lxvd2x 4, STR0, r7 +lxvd2x 5, STR1, r7 +xvmaddasp 0, 4, 5 + +lxvd2x 6, STR0, r8 +lxvd2x 7, STR1, r8 +xvmaddasp 1, 6, 7 + +lxvd2x 8, STR0, r9 +lxvd2x 9, STR1, r9 +xvmaddasp 2, 8, 9 + +lxvd2x 10, STR0, r10 +lxvd2x 11, STR1, r10 +xvmaddasp 3, 10, 11 + +addi STR0, STR0, 64 +addi STR1, STR1, 64 diff --git a/bench/power/ddot_vsx4_sp_new.ptt b/bench/power/ddot_vsx4_sp_new.ptt new file mode 100644 index 000000000..290f9bdf4 --- /dev/null +++ b/bench/power/ddot_vsx4_sp_new.ptt @@ -0,0 +1,41 @@ +STREAMS 2 +TYPE SINGLE +FLOPS 2 +BYTES 8 + +li r10, 16 # eight (4x2) elements per loop iteration +divd r10, r3, r10 # r3 holds loop limit, divide by elements per loop iteration +mtctr r10 # move to count register +li r7, 0 # load immediate value of 0 into r7 (used as offset in addressing) +li r8, 16 # load immediate value of 16 into r8 (used as offset in addressing) +li r9, 32 # load immediate value of 32 into r9 (used as offset in addressing) +li r10, 48 # load immediate value of 48 into r10 (used as offset in addressing) + +xvsubsp 0, 0, 0 # set to zero +xvsubsp 1, 1, 1 # set to zero +xvsubsp 2, 2, 2 # set to zero +xvsubsp 3, 3, 3 # set to zero + +LOOP 16 + +lxvd2x 4, STR0, r7 +lxvd2x 5, STR1, r7 +lxvd2x 6, STR0, r8 +lxvd2x 7, STR1, r8 + +addi STR0, STR0, 64 + +xvmaddasp 2, 8, 9 +xvmaddasp 3, 10, 11 + +lxvd2x 8, STR0, r9 +lxvd2x 9, STR1, r9 +lxvd2x 10, STR0, r10 +lxvd2x 11, STR1, r10 + +addi STR1, STR1, 64 + +xvmaddasp 0, 4, 5 +xvmaddasp 1, 6, 7 + + diff --git a/bench/power/gs_fwd_scalar.ptt b/bench/power/gs_fwd_scalar.ptt new file mode 100644 index 000000000..8c387caac --- /dev/null +++ b/bench/power/gs_fwd_scalar.ptt @@ -0,0 +1,32 @@ +STREAMS 2 +TYPE DOUBLE +FLOPS 3 +BYTES 16 + +.set vec0,0; .set vec1,1; .set vec2,2; .set vec3,3; +.set vec4,4; .set vec5,5; .set vec6,6; .set vec7,7; +.set vec8,8; .set vec9,9; .set vec10,10; .set vec11,11; + +mtctr r3 # move to count register +li r7, 0 # load immediate value of 0 into r7 (used as offset in addressing) + +xssubdp vec0, vec0, vec0 # set to zero +xssubdp vec1, vec1, vec1 # set to zero +xssubdp vec2, vec2, vec2 # set to zero +xssubdp vec3, vec3, vec3 # set to zero +xssubdp vec4, vec4, vec4 # set to zero +xssubdp vec5, vec5, vec5 # set to zero +xssubdp vec10, vec10, vec10 # set to zero + +LOOP 1 + +lfd vec0, 0(STR0) +lfd vec10, 0(STR1) + +fmadd vec0, vec1, vec10, vec0 # uses both loaded values and a constant +fmadd vec0, vec3, vec4, vec0 # has to wait for vec0 from previous FMA, has to wait for vec0 from previous iteration's mul +fmul vec4, vec0, vec5 # has to wait for vec0 from previous FMA +stfd vec4, 0(STR1) + +addi STR0, STR0, 8 +addi STR1, STR1, 8 diff --git a/bench/power/gs_fwd_vsx4.ptt b/bench/power/gs_fwd_vsx4.ptt new file mode 100644 index 000000000..0f45f2e62 --- /dev/null +++ b/bench/power/gs_fwd_vsx4.ptt @@ -0,0 +1,32 @@ +STREAMS 2 +TYPE DOUBLE +FLOPS 3 +BYTES 16 + +.set vec0,0; .set vec1,1; .set vec2,2; .set vec3,3; +.set vec4,4; .set vec5,5; .set vec6,6; .set vec7,7; +.set vec8,8; .set vec9,9; .set vec10,10; .set vec11,11; + +mtctr r3 # move to count register +li r7, 0 # load immediate value of 0 into r7 (used as offset in addressing) + +xvsubdp vec0, vec0, vec0 # set to zero +xvsubdp vec1, vec1, vec1 # set to zero +xvsubdp vec2, vec2, vec2 # set to zero +xvsubdp vec3, vec3, vec3 # set to zero +xvsubdp vec4, vec4, vec4 # set to zero +xvsubdp vec5, vec5, vec5 # set to zero +xvsubdp vec10, vec10, vec10 # set to zero + +LOOP 1 + +lxvx vec0, STR0, r7 +lxvx vec10, STR1, r7 + +xsmaddadp vec0, vec1, vec10 # uses both loaded values and a constant +xsmaddadp vec0, vec3, vec4 # has to wait for vec0 from previous FMA, has to wait for vec0 from previous iteration's mul +xsmuldp vec4, vec0, vec5 # has to wait for vec0 from previous FMA +stxvx vec4, STR1, r7 + +addi STR0, STR0, 8 +addi STR1, STR1, 8 diff --git a/bench/power/gs_fwd_vsx4_alt.ptt b/bench/power/gs_fwd_vsx4_alt.ptt new file mode 100644 index 000000000..06e74cc61 --- /dev/null +++ b/bench/power/gs_fwd_vsx4_alt.ptt @@ -0,0 +1,31 @@ +STREAMS 2 +TYPE DOUBLE +FLOPS 3 +BYTES 16 + +.set vec0,0; .set vec1,1; .set vec2,2; .set vec3,3; +.set vec4,4; .set vec5,5; .set vec6,6; .set vec7,7; +.set vec8,8; .set vec9,9; .set vec10,10; .set vec11,11; + +mtctr r3 # move to count register +li r7, 0 # load immediate value of 0 into r7 (used as offset in addressing) + +xvsubdp vec0, vec0, vec0 # set to zero +xvsubdp vec1, vec1, vec1 # set to zero +xvsubdp vec2, vec2, vec2 # set to zero +xvsubdp vec3, vec3, vec3 # set to zero +xvsubdp vec4, vec4, vec4 # set to zero +xvsubdp vec5, vec5, vec5 # set to zero +xvsubdp vec10, vec10, vec10 # set to zero + +LOOP 1 + +lxvx vec0, STR0, r7 +lxvx vec10, STR1, r7 + +xsmaddadp vec0, vec1, vec10 # uses both loaded values and a constant +xsmaddadp vec0, vec3, vec4 # has to wait for vec0 from previous FMA, has to wait for vec0 from previous iteration's mul +xsmuldp vec4, vec0, vec5 # has to wait for vec0 from previous FMA +stxvx vec4, STR1, r7 + +addi r7, r7, 8 diff --git a/bench/power/lfd_stfd_lat.ptt b/bench/power/lfd_stfd_lat.ptt new file mode 100644 index 000000000..e5f53cb10 --- /dev/null +++ b/bench/power/lfd_stfd_lat.ptt @@ -0,0 +1,40 @@ +STREAMS 1 +TYPE DOUBLE +DESC Double-precision add, only scalar operations +FLOPS 1 +BYTES 24 +LOADS 2 +STORES 1 +INSTR_LOOP 19 +INSTR_CONST 5 +UOPS 19 + +li r10, 8 # four elements per loop iteration +divd r10, r3, r10 # r3 holds loop limit, divide by elements per loop iteration +mtctr r10 # move to count register + +LOOP 8 + +lfd 1, 0(STR0) +stfd 1, 0(STR0) + +lfd 1, 0(STR0) +stfd 1, 0(STR0) + +lfd 1, 0(STR0) +stfd 1, 0(STR0) + +lfd 1, 0(STR0) +stfd 1, 0(STR0) + +lfd 1, 0(STR0) +stfd 1, 0(STR0) + +lfd 1, 0(STR0) +stfd 1, 0(STR0) + +lfd 1, 0(STR0) +stfd 1, 0(STR0) + +lfd 1, 0(STR0) +stfd 1, 0(STR0) diff --git a/bench/power/load_scalar4.ptt b/bench/power/load_scalar4.ptt new file mode 100644 index 000000000..ab61079a3 --- /dev/null +++ b/bench/power/load_scalar4.ptt @@ -0,0 +1,42 @@ +STREAMS 1 +TYPE DOUBLE +FLOPS 1 +BYTES 8 + + +fsub 0, 0, 0 +fsub 1, 1, 1 +fsub 2, 2, 2 +fsub 3, 3, 3 +fsub 4, 4, 4 +fsub 5, 5, 5 +fsub 6, 6, 6 +fsub 7, 7, 7 + +LOOP 8 + +lfd 10, 0(STR0) +fadd 0, 0, 10 + +lfd 11, 8(STR0) +fadd 1, 1, 11 + +lfd 12, 16(STR0) +fadd 2, 2, 12 + +lfd 13, 24(STR0) +fadd 3, 3, 13 + +lfd 14, 32(STR0) +fadd 4, 4, 14 + +lfd 15, 40(STR0) +fadd 5, 5, 15 + +lfd 16, 48(STR0) +fadd 6, 6, 16 + +lfd 17, 56(STR0) +fadd 7, 7, 17 + +addi STR0, STR0, 64 diff --git a/bench/power/load_vsx1.ptt b/bench/power/load_vsx1.ptt new file mode 100644 index 000000000..311bca9ff --- /dev/null +++ b/bench/power/load_vsx1.ptt @@ -0,0 +1,17 @@ +STREAMS 1 +TYPE DOUBLE +FLOPS 1 +BYTES 8 + +li r7, 0 # load immediate value of 0 into r7 (used as offset in addressing) +li r8, 16 # load immediate value of 16 into r8 (used as offset in addressing) +li r9, 32 # load immediate value of 32 into r9 (used as offset in addressing) +li r10, 48 # load immediate value of 48 into r10 (used as offset in addressing) + +xvsubdp 0, 0, 0 # set to zero + +LOOP 2 + +lxvd2x 10, STR0, r7 +xvadddp 0, 0, 10 +addi STR0, STR0, 16 diff --git a/bench/power/load_vsx4.ptt b/bench/power/load_vsx4.ptt new file mode 100644 index 000000000..402045487 --- /dev/null +++ b/bench/power/load_vsx4.ptt @@ -0,0 +1,30 @@ +STREAMS 1 +TYPE DOUBLE +FLOPS 1 +BYTES 8 + +li r7, 0 # load immediate value of 0 into r7 (used as offset in addressing) +li r8, 16 # load immediate value of 16 into r8 (used as offset in addressing) +li r9, 32 # load immediate value of 32 into r9 (used as offset in addressing) +li r10, 48 # load immediate value of 48 into r10 (used as offset in addressing) + +xvsubdp 0, 0, 0 # set to zero +xvsubdp 1, 1, 1 # set to zero +xvsubdp 2, 2, 2 # set to zero +xvsubdp 3, 3, 3 # set to zero +xvsubdp 4, 4, 4 # set to zero +xvsubdp 5, 5, 5 # set to zero +xvsubdp 6, 6, 6 # set to zero +xvsubdp 7, 7, 7 # set to zero + +LOOP 8 + +lxvd2x 10, STR0, r7 +xvadddp 0, 0, 10 +lxvd2x 11, STR0, r8 +xvadddp 1, 1, 11 +lxvd2x 12, STR0, r9 +xvadddp 2, 2, 12 +lxvd2x 13, STR0, r10 +xvadddp 3, 3, 13 +addi STR0, STR0, 64 diff --git a/bench/power/schoenauer_triad_scalar4.ptt b/bench/power/schoenauer_triad_scalar4.ptt new file mode 100644 index 000000000..550f1a6d7 --- /dev/null +++ b/bench/power/schoenauer_triad_scalar4.ptt @@ -0,0 +1,39 @@ +STREAMS 4 +TYPE DOUBLE +FLOPS 2 +BYTES 32 + +li r10, 4 # four elements per loop iteration +divd r10, r3, r10 # r3 holds loop limit, divide by elements per loop iteration +mtctr r10 # move to count register + +LOOP 4 + +lfd 0, 0(STR0) +lfd 1, 0(STR1) +lfd 2, 0(STR2) +fmadd 0, 1, 2, 0 +stfd 0, 0(STR3) + +lfd 3, 8(STR0) +lfd 4, 8(STR1) +lfd 5, 8(STR2) +fmadd 3, 4, 5, 3 +stfd 3, 8(STR3) + +lfd 6, 16(STR0) +lfd 7, 16(STR1) +lfd 8, 16(STR2) +fmadd 6, 7, 8, 6 +stfd 6, 16(STR3) + +lfd 9, 24(STR0) +lfd 10, 24(STR1) +lfd 11, 24(STR2) +fmadd 9, 10, 11, 9 +stfd 9, 24(STR3) + +addi STR0, STR0, 32 +addi STR1, STR1, 32 +addi STR2, STR2, 32 +addi STR3, STR3, 32 diff --git a/bench/power/schoenauer_triad_vsx4.ptt b/bench/power/schoenauer_triad_vsx4.ptt new file mode 100644 index 000000000..c3fb99ee4 --- /dev/null +++ b/bench/power/schoenauer_triad_vsx4.ptt @@ -0,0 +1,43 @@ +STREAMS 4 +TYPE DOUBLE +FLOPS 2 +BYTES 32 + +li r10, 8 # eight (4x2) elements per loop iteration +divd r10, r3, r10 # r3 holds loop limit, divide by elements per loop iteration +mtctr r10 # move to count register +li r8, 0 # load immediate value of 0 into r7 (used as offset in addressing) +li r9, 16 # load immediate value of 16 into r8 (used as offset in addressing) +li r10, 32 # load immediate value of 32 into r9 (used as offset in addressing) +li 11, 48 # load immediate value of 48 into r10 (used as offset in addressing) + +LOOP 8 + +lxvd2x 0, STR0, r8 +lxvd2x 1, STR1, r8 +lxvd2x 2, STR2, r8 +xvmaddadp 0, 1, 2 +stxvd2x 0, STR3, r8 + +lxvd2x 3, STR0, r9 +lxvd2x 4, STR1, r9 +lxvd2x 5, STR2, r9 +xvmaddadp 3, 4, 5 +stxvd2x 3, STR3, r9 + +lxvd2x 6, STR0, r10 +lxvd2x 7, STR1, r10 +lxvd2x 8, STR2, r10 +xvmaddadp 6, 7, 8 +stxvd2x 6, STR3, r10 + +lxvd2x 9, STR0, 11 +lxvd2x 10, STR1, 11 +lxvd2x 11, STR2, 11 +xvmaddadp 9, 10, 11 +stxvd2x 9, STR3, 11 + +addi STR0, STR0, 64 +addi STR1, STR1, 64 +addi STR2, STR2, 64 +addi STR3, STR3, 64 diff --git a/bench/power/store_scalar4.ptt b/bench/power/store_scalar4.ptt new file mode 100644 index 000000000..0ed749a6d --- /dev/null +++ b/bench/power/store_scalar4.ptt @@ -0,0 +1,18 @@ +STREAMS 1 +TYPE DOUBLE +FLOPS 0 +BYTES 8 + +fsub x0, x0, x0 # zero +fsub x1, x1, x1 # zero +fsub x2, x2, x2 # zero +fsub x3, x3, x3 # zero + +LOOP 4 + +stfd x0, 0(STR0) +stfd x1, 8(STR0) +stfd x2, 16(STR0) +stfd x3, 24(STR0) + +addi STR0, STR0, 32 diff --git a/bench/power/store_vsx4.ptt b/bench/power/store_vsx4.ptt new file mode 100644 index 000000000..6f0786bf7 --- /dev/null +++ b/bench/power/store_vsx4.ptt @@ -0,0 +1,23 @@ +STREAMS 1 +TYPE DOUBLE +FLOPS 0 +BYTES 8 + +li r7, 0 # load immediate value of 0 into r7 (used as offset in addressing) +li r8, 16 # load immediate value of 16 into r8 (used as offset in addressing) +li r9, 32 # load immediate value of 32 into r9 (used as offset in addressing) +li r10, 48 # load immediate value of 48 into r10 (used as offset in addressing) + +xvsubdp 0, 0, 0 # set to zero +xvsubdp 1, 1, 1 # set to zero +xvsubdp 2, 2, 2 # set to zero +xvsubdp 3, 3, 3 # set to zero + +LOOP 8 + +stxvd2x 0, STR0, r7 +stxvd2x 1, STR0, r8 +stxvd2x 2, STR0, r9 +stxvd2x 3, STR0, r10 + +addi STR0, STR0, 64 diff --git a/bench/power/stream.ptt b/bench/power/stream.ptt new file mode 100644 index 000000000..842166aab --- /dev/null +++ b/bench/power/stream.ptt @@ -0,0 +1,12 @@ +STREAMS 3 +TYPE DOUBLE +FLOPS 2 +BYTES 24 +ld GPR10, SCALAR@got(2) +lxvd2x FPR2, 0, GPR10 +LOOP 2 +lxvd2x FPR1, GPR9, STR0 +lxvd2x FPR3, GPR9, STR1 +xvmaddmdp FPR1, FPR2, FPR3 +stxvd2x FPR1, GPR9, STR2 + diff --git a/bench/power/stream_triad_scalar4.ptt b/bench/power/stream_triad_scalar4.ptt new file mode 100644 index 000000000..91ecf48a3 --- /dev/null +++ b/bench/power/stream_triad_scalar4.ptt @@ -0,0 +1,38 @@ +STREAMS 4 +TYPE DOUBLE +FLOPS 2 +BYTES 24 + +li r10, 4 # four elements per loop iteration +divd r10, r3, r10 # r3 holds loop limit, divide by elements per loop iteration +mtctr r10 # move to count register +fsub 2, 2, 2 # zero +fsub 5, 5, 5 # zero +fsub 8, 8, 8 # zero +fsub 11, 11, 11 # zero + +LOOP 4 + +lfd 0, 0(STR0) +lfd 1, 0(STR1) +fmadd 0, 1, 2, 0 +stfd 0, 0(STR2) + +lfd 3, 8(STR0) +lfd 4, 8(STR1) +fmadd 3, 4, 5, 3 +stfd 3, 8(STR2) + +lfd 6, 16(STR0) +lfd 7, 16(STR1) +fmadd 6, 7, 8, 6 +stfd 6, 16(STR2) + +lfd 9, 24(STR0) +lfd 10, 24(STR1) +fmadd 9, 10, 11, 9 +stfd 9, 24(STR2) + +addi STR0, STR0, 32 +addi STR1, STR1, 32 +addi STR2, STR2, 32 diff --git a/bench/power/stream_triad_vsx4.ptt b/bench/power/stream_triad_vsx4.ptt new file mode 100644 index 000000000..4d1b182c8 --- /dev/null +++ b/bench/power/stream_triad_vsx4.ptt @@ -0,0 +1,49 @@ +STREAMS 4 +TYPE DOUBLE +FLOPS 2 +BYTES 24 +LOADS 2 +STORES 1 +INSTR_LOOP 19 +INSTR_CONST 13 +UOPS 19 + +li r10, 8 # eight (4x2) elements per loop iteration +divd r10, r3, r10 # r3 holds loop limit, divide by elements per loop iteration +mtctr r10 # move to count register +li r8, 0 # load immediate value of 0 into r7 (used as offset in addressing) +li r9, 16 # load immediate value of 16 into r8 (used as offset in addressing) +li r10, 32 # load immediate value of 32 into r9 (used as offset in addressing) +li 11, 48 # load immediate value of 48 into r10 (used as offset in addressing) + + +xvsubdp 2, 2, 2 # set to zero +xvsubdp 5, 5, 5 # set to zero +xvsubdp 8, 8, 8 # set to zero +xvsubdp 11, 11, 11 # set to zero + +LOOP 8 + +lxvd2x 0, STR0, r8 +lxvd2x 1, STR1, r8 +xvmaddadp 0, 1, 2 +stxvd2x 0, STR2, r8 + +lxvd2x 3, STR0, r9 +lxvd2x 4, STR1, r9 +xvmaddadp 3, 4, 5 +stxvd2x 3, STR2, r9 + +lxvd2x 6, STR0, r10 +lxvd2x 7, STR1, r10 +xvmaddadp 6, 7, 8 +stxvd2x 6, STR2, r10 + +lxvd2x 9, STR0, 11 +lxvd2x 10, STR1, 11 +xvmaddadp 9, 10, 11 +stxvd2x 9, STR2, 11 + +addi STR0, STR0, 64 +addi STR1, STR1, 64 +addi STR2, STR2, 64 diff --git a/bench/power/update_vsx4.ptt b/bench/power/update_vsx4.ptt new file mode 100644 index 000000000..b28ee9112 --- /dev/null +++ b/bench/power/update_vsx4.ptt @@ -0,0 +1,34 @@ +STREAMS 1 +TYPE DOUBLE +FLOPS 0 +BYTES 16 +INSTR_LOOP 3 +INSTR_CONST 1 +UOPS 4 +LOADS 1 +STORES 1 + +li r10, 8 # two elements per loop iteration +divd r10, r3, r10 # r3 holds loop limit, divide by elements per loop iteration +mtctr r10 # move to count register +li r7, 0 # load immediate value of 0 into r7 (used as offset in addressing) +li r8, 16 # load immediate value of 16 into r8 (used as offset in addressing) +li r9, 32 # load immediate value of 32 into r9 (used as offset in addressing) +li r10, 48 # load immediate value of 48 into r10 (used as offset in addressing) + +LOOP 8 +lxvd2x x1, STR0, r7 +stxvd2x x1, STR0, r7 +addi r7, r7, 64 + +lxvd2x x1, STR0, r8 +stxvd2x x1, STR0, r8 +addi r8, r8, 64 + +lxvd2x x1, STR0, r9 +stxvd2x x1, STR0, r9 +addi r9, r9, 64 + +lxvd2x x1, STR0, r10 +stxvd2x x1, STR0, r10 +addi r10, r10, 64 diff --git a/bench/src/allocator.c b/bench/src/allocator.c index 3c3775585..8bec7ebc4 100644 --- a/bench/src/allocator.c +++ b/bench/src/allocator.c @@ -96,13 +96,15 @@ allocator_allocateVector( int offset, DataType type, int stride, - bstring domainString) + bstring domainString, + int init_per_thread) { int i; size_t bytesize = 0; const AffinityDomain* domain = NULL; int errorCode; int elements = 0; + affinity_init(); size_t typesize = allocator_dataTypeLength(type); bytesize = (size+offset) * typesize; @@ -161,48 +163,50 @@ allocator_allocateVector( offset, LLU_CAST elements); - switch ( type ) + if (!init_per_thread) { - case INT: - { - int* sptr = (int*) (*ptr); - sptr += offset; - - for ( uint64_t i=0; i < size; i++ ) + switch ( type ) + { + case INT: { - sptr[i] = 1; - } - *ptr = (void*) sptr; + int* sptr = (int*) (*ptr); + sptr += offset; - } - break; + for ( uint64_t i=0; i < size; i++ ) + { + sptr[i] = 1; + } + *ptr = (void*) sptr; - case SINGLE: - { - float* sptr = (float*) (*ptr); - sptr += offset; + } + break; - for ( uint64_t i=0; i < size; i++ ) + case SINGLE: { - sptr[i] = 1.0; - } - *ptr = (void*) sptr; + float* sptr = (float*) (*ptr); + sptr += offset; - } - break; + for ( uint64_t i=0; i < size; i++ ) + { + sptr[i] = 1.0; + } + *ptr = (void*) sptr; - case DOUBLE: - { - double* dptr = (double*) (*ptr); - dptr += offset; + } + break; - for ( uint64_t i=0; i < size; i++ ) + case DOUBLE: { - dptr[i] = 1.0; + double* dptr = (double*) (*ptr); + dptr += offset; + + for ( uint64_t i=0; i < size; i++ ) + { + dptr[i] = 1.0; + } + *ptr = (void*) dptr; } - *ptr = (void*) dptr; - } - break; + break; + } } } - diff --git a/bench/src/barrier.c b/bench/src/barrier.c index 018c9d1c6..bcda23a25 100644 --- a/bench/src/barrier.c +++ b/bench/src/barrier.c @@ -141,9 +141,14 @@ barrier_synchronize(BarrierData* barr) { #if defined(__arm__) || defined(__ARM_ARCH_8A) __asm__ ("nop"); -#else +#endif +#if defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || defined(__x86_64) __asm__ ("pause"); #endif +#ifdef _ARCH_PCC + __asm__ ("noop"); +#endif + } } diff --git a/bench/src/bench.c b/bench/src/bench.c index 23bb4d512..438cbcc90 100644 --- a/bench/src/bench.c +++ b/bench/src/bench.c @@ -42,13 +42,15 @@ #include #include #include -#include +//#include +#include /* ##### MACROS - LOCAL TO THIS SOURCE FILE ######################### */ #define BARRIER barrier_synchronize(&barr) #define EXECUTE(func) \ + LIKWID_MARKER_REGISTER("bench"); \ BARRIER; \ LIKWID_MARKER_START("bench"); \ timer_start(&time); \ @@ -73,6 +75,7 @@ runTest(void* arg) size_t size; size_t vecsize; size_t i; + size_t j = 0; BarrierData barr; ThreadData* data; ThreadUserData* myData; @@ -88,7 +91,7 @@ runTest(void* arg) /* Prepare ptrs for thread */ vecsize = myData->size / data->numberOfThreads; size = myData->size / data->numberOfThreads; - + size -= (size % myData->test->stride); myData->size = size; offset = data->threadId * size; @@ -96,6 +99,17 @@ runTest(void* arg) if (size != vecsize && data->threadId == 0) printf("Sanitizing vector length to a multiple of the loop stride from %d elements (%d bytes) to %d elements (%d bytes)\n", vecsize, vecsize*myData->test->bytes, size, size*myData->test->bytes); + /* pin the thread */ + likwid_pinThread(myData->processors[threadId]); + printf("Group: %d Thread %d Global Thread %d running on core %d - Vector length %llu Offset %zd\n", + data->groupId, + threadId, + data->globalThreadId, + affinity_threadGetProcessorId(), + LLU_CAST size, + offset); + BARRIER; + switch ( myData->test->type ) { case SINGLE: @@ -105,6 +119,13 @@ runTest(void* arg) { sptr = (float*) myData->streams[i]; sptr += offset; + if (myData->init_per_thread) + { + for (j = 0; j < vecsize; j++) + { + sptr[j] = 1.0; + } + } myData->streams[i] = (float*) sptr; } } @@ -116,6 +137,13 @@ runTest(void* arg) { sptr = (int*) myData->streams[i]; sptr += offset; + if (myData->init_per_thread) + { + for (j = 0; j < vecsize; j++) + { + sptr[j] = 1; + } + } myData->streams[i] = (int*) sptr; } } @@ -127,21 +155,19 @@ runTest(void* arg) { dptr = (double*) myData->streams[i]; dptr += offset; + if (myData->init_per_thread) + { + for (j = 0; j < vecsize; j++) + { + dptr[j] = 1.0; + } + } myData->streams[i] = (double*) dptr; } } break; } - /* pin the thread */ - likwid_pinThread(myData->processors[threadId]); - printf("Group: %d Thread %d Global Thread %d running on core %d - Vector length %llu Offset %zd\n", - data->groupId, - threadId, - data->globalThreadId, - affinity_threadGetProcessorId(), - LLU_CAST size, - offset); BARRIER; /* Up to 10 streams the following registers are used for Array ptr: @@ -473,7 +499,7 @@ getIterSingle(void* arg) //size = myData->size - (myData->size % myData->test->stride); vecsize = myData->size; size = myData->size / data->numberOfThreads; - + size -= (size % myData->test->stride); offset = data->threadId * size; @@ -771,4 +797,3 @@ getIterSingle(void* arg) #endif return NULL; } - diff --git a/bench/src/bstrlib.c b/bench/src/bstrlib.c deleted file mode 100644 index 380269cd2..000000000 --- a/bench/src/bstrlib.c +++ /dev/null @@ -1,2955 +0,0 @@ -/* - * ======================================================================================= - * This source file is part of the bstring string library. This code was - * written by Paul Hsieh in 2002-2008, and is covered by the BSD open source - * license and the GPL. Refer to the accompanying documentation for details - * on usage and license. - */ -/* - * bstrlib.c - * - * This file is the core module for implementing the bstring functions. - */ - -#include -#include -#include -#include -#include -#include -#include "bstrlib.h" - -/* Optionally include a mechanism for debugging memory */ - -#if defined(MEMORY_DEBUG) || defined(BSTRLIB_MEMORY_DEBUG) -#include "memdbg.h" -#endif - -#ifndef bstr__alloc -#define bstr__alloc(x) malloc (x) -#endif - -#ifndef bstr__free -#define bstr__free(p) free (p) -#endif - -#ifndef bstr__realloc -#define bstr__realloc(p,x) realloc ((p), (x)) -#endif - -#ifndef bstr__memcpy -#define bstr__memcpy(d,s,l) memcpy ((d), (s), (l)) -#endif - -#ifndef bstr__memmove -#define bstr__memmove(d,s,l) memmove ((d), (s), (l)) -#endif - -#ifndef bstr__memset -#define bstr__memset(d,c,l) memset ((d), (c), (l)) -#endif - -#ifndef bstr__memcmp -#define bstr__memcmp(d,c,l) memcmp ((d), (c), (l)) -#endif - -#ifndef bstr__memchr -#define bstr__memchr(s,c,l) memchr ((s), (c), (l)) -#endif - -/* Just a length safe wrapper for memmove. */ - -#define bBlockCopy(D,S,L) { if ((L) > 0) bstr__memmove ((D),(S),(L)); } - -/* Compute the snapped size for a given requested size. By snapping to powers - of 2 like this, repeated reallocations are avoided. */ -static int snapUpSize (int i) { - if (i < 8) { - i = 8; - } else { - unsigned int j; - j = (unsigned int) i; - - j |= (j >> 1); - j |= (j >> 2); - j |= (j >> 4); - j |= (j >> 8); /* Ok, since int >= 16 bits */ -#if (UINT_MAX != 0xffff) - j |= (j >> 16); /* For 32 bit int systems */ -#if (UINT_MAX > 0xffffffffUL) - j |= (j >> 32); /* For 64 bit int systems */ -#endif -#endif - /* Least power of two greater than i */ - j++; - if ((int) j >= i) i = (int) j; - } - return i; -} - -/* int balloc (bstring b, int len) - * - * Increase the size of the memory backing the bstring b to at least len. - */ -int balloc (bstring b, int olen) { - int len; - if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen <= 0 || - b->mlen < b->slen || olen <= 0) { - return BSTR_ERR; - } - - if (olen >= b->mlen) { - unsigned char * x; - - if ((len = snapUpSize (olen)) <= b->mlen) return BSTR_OK; - - /* Assume probability of a non-moving realloc is 0.125 */ - if (7 * b->mlen < 8 * b->slen) { - - /* If slen is close to mlen in size then use realloc to reduce - the memory defragmentation */ - - reallocStrategy:; - - x = (unsigned char *) bstr__realloc (b->data, (size_t) len); - if (x == NULL) { - - /* Since we failed, try allocating the tighest possible - allocation */ - - if (NULL == (x = (unsigned char *) bstr__realloc (b->data, (size_t) (len = olen)))) { - return BSTR_ERR; - } - } - } else { - - /* If slen is not close to mlen then avoid the penalty of copying - the extra bytes that are allocated, but not considered part of - the string */ - - if (NULL == (x = (unsigned char *) bstr__alloc ((size_t) len))) { - - /* Perhaps there is no available memory for the two - allocations to be in memory at once */ - - goto reallocStrategy; - - } else { - if (b->slen) bstr__memcpy ((char *) x, (char *) b->data, (size_t) b->slen); - bstr__free (b->data); - } - } - b->data = x; - b->mlen = len; - b->data[b->slen] = (unsigned char) '\0'; - } - - return BSTR_OK; -} - -/* int ballocmin (bstring b, int len) - * - * Set the size of the memory backing the bstring b to len or b->slen+1, - * whichever is larger. Note that repeated use of this function can degrade - * performance. - */ -int ballocmin (bstring b, int len) { - unsigned char * s; - - if (b == NULL || b->data == NULL || (b->slen+1) < 0 || b->mlen <= 0 || - b->mlen < b->slen || len <= 0) { - return BSTR_ERR; - } - - if (len < b->slen + 1) len = b->slen + 1; - - if (len != b->mlen) { - s = (unsigned char *) bstr__realloc (b->data, (size_t) len); - if (NULL == s) return BSTR_ERR; - s[b->slen] = (unsigned char) '\0'; - b->data = s; - b->mlen = len; - } - - return BSTR_OK; -} - -/* bstring bfromcstr (const char * str) - * - * Create a bstring which contains the contents of the '\0' terminated char * - * buffer str. - */ -bstring bfromcstr (const char * str) { -bstring b; -int i; -size_t j; - - if (str == NULL) return NULL; - j = (strlen) (str); - i = snapUpSize ((int) (j + (2 - (j != 0)))); - if (i <= (int) j) return NULL; - - b = (bstring) bstr__alloc (sizeof (struct tagbstring)); - if (NULL == b) return NULL; - b->slen = (int) j; - if (NULL == (b->data = (unsigned char *) bstr__alloc (b->mlen = i))) { - bstr__free (b); - return NULL; - } - - bstr__memcpy (b->data, str, j+1); - return b; -} - -/* bstring bfromcstralloc (int mlen, const char * str) - * - * Create a bstring which contains the contents of the '\0' terminated char * - * buffer str. The memory buffer backing the string is at least len - * characters in length. - */ -bstring bfromcstralloc (int mlen, const char * str) { -bstring b; -int i; -size_t j; - - if (str == NULL) return NULL; - j = (strlen) (str); - i = snapUpSize ((int) (j + (2 - (j != 0)))); - if (i <= (int) j) return NULL; - - b = (bstring) bstr__alloc (sizeof (struct tagbstring)); - if (b == NULL) return NULL; - b->slen = (int) j; - if (i < mlen) i = mlen; - - if (NULL == (b->data = (unsigned char *) bstr__alloc (b->mlen = i))) { - bstr__free (b); - return NULL; - } - - bstr__memcpy (b->data, str, j+1); - return b; -} - -/* bstring blk2bstr (const void * blk, int len) - * - * Create a bstring which contains the content of the block blk of length - * len. - */ -bstring blk2bstr (const void * blk, int len) { -bstring b; -int i; - - if (blk == NULL || len < 0) return NULL; - b = (bstring) bstr__alloc (sizeof (struct tagbstring)); - if (b == NULL) return NULL; - b->slen = len; - - i = len + (2 - (len != 0)); - i = snapUpSize (i); - - b->mlen = i; - - b->data = (unsigned char *) bstr__alloc ((size_t) b->mlen); - if (b->data == NULL) { - bstr__free (b); - return NULL; - } - - if (len > 0) bstr__memcpy (b->data, blk, (size_t) len); - b->data[len] = (unsigned char) '\0'; - - return b; -} - -/* char * bstr2cstr (const_bstring s, char z) - * - * Create a '\0' terminated char * buffer which is equal to the contents of - * the bstring s, except that any contained '\0' characters are converted - * to the character in z. This returned value should be freed with a - * bcstrfree () call, by the calling application. - */ -char * bstr2cstr (const_bstring b, char z) { -int i, l; -char * r; - - if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; - l = b->slen; - r = (char *) bstr__alloc ((size_t) (l + 1)); - if (r == NULL) return r; - - for (i=0; i < l; i ++) { - r[i] = (char) ((b->data[i] == '\0') ? z : (char) (b->data[i])); - } - - r[l] = (unsigned char) '\0'; - - return r; -} - -/* int bcstrfree (char * s) - * - * Frees a C-string generated by bstr2cstr (). This is normally unnecessary - * since it just wraps a call to bstr__free (), however, if bstr__alloc () - * and bstr__free () have been redefined as a macros within the bstrlib - * module (via defining them in memdbg.h after defining - * BSTRLIB_MEMORY_DEBUG) with some difference in behaviour from the std - * library functions, then this allows a correct way of freeing the memory - * that allows higher level code to be independent from these macro - * redefinitions. - */ -int bcstrfree (char * s) { - if (s) { - bstr__free (s); - return BSTR_OK; - } - return BSTR_ERR; -} - -/* int bconcat (bstring b0, const_bstring b1) - * - * Concatenate the bstring b1 to the bstring b0. - */ -int bconcat (bstring b0, const_bstring b1) { -int len, d; -bstring aux = (bstring) b1; - - if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL) return BSTR_ERR; - - d = b0->slen; - len = b1->slen; - if ((d | (b0->mlen - d) | len | (d + len)) < 0) return BSTR_ERR; - - if (b0->mlen <= d + len + 1) { - ptrdiff_t pd = b1->data - b0->data; - if (0 <= pd && pd < b0->mlen) { - if (NULL == (aux = bstrcpy (b1))) return BSTR_ERR; - } - if (balloc (b0, d + len + 1) != BSTR_OK) { - if (aux != b1) bdestroy (aux); - return BSTR_ERR; - } - } - - bBlockCopy (&b0->data[d], &aux->data[0], (size_t) len); - b0->data[d + len] = (unsigned char) '\0'; - b0->slen = d + len; - if (aux != b1) bdestroy (aux); - return BSTR_OK; -} - -/* int bconchar (bstring b, char c) -/ * - * Concatenate the single character c to the bstring b. - */ -int bconchar (bstring b, char c) { -int d; - - if (b == NULL) return BSTR_ERR; - d = b->slen; - if ((d | (b->mlen - d)) < 0 || balloc (b, d + 2) != BSTR_OK) return BSTR_ERR; - b->data[d] = (unsigned char) c; - b->data[d + 1] = (unsigned char) '\0'; - b->slen++; - return BSTR_OK; -} - -/* int bcatcstr (bstring b, const char * s) - * - * Concatenate a char * string to a bstring. - */ -int bcatcstr (bstring b, const char * s) { -char * d; -int i, l; - - if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen - || b->mlen <= 0 || s == NULL) return BSTR_ERR; - - /* Optimistically concatenate directly */ - l = b->mlen - b->slen; - d = (char *) &b->data[b->slen]; - for (i=0; i < l; i++) { - if ((*d++ = *s++) == '\0') { - b->slen += i; - return BSTR_OK; - } - } - b->slen += i; - - /* Need to explicitely resize and concatenate tail */ - return bcatblk (b, (const void *) s, (int) strlen (s)); -} - -/* int bcatblk (bstring b, const void * s, int len) - * - * Concatenate a fixed length buffer to a bstring. - */ -int bcatblk (bstring b, const void * s, int len) { -int nl; - - if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen - || b->mlen <= 0 || s == NULL || len < 0) return BSTR_ERR; - - if (0 > (nl = b->slen + len)) return BSTR_ERR; /* Overflow? */ - if (b->mlen <= nl && 0 > balloc (b, nl + 1)) return BSTR_ERR; - - bBlockCopy (&b->data[b->slen], s, (size_t) len); - b->slen = nl; - b->data[nl] = (unsigned char) '\0'; - return BSTR_OK; -} - -/* bstring bstrcpy (const_bstring b) - * - * Create a copy of the bstring b. - */ -bstring bstrcpy (const_bstring b) { -bstring b0; -int i,j; - - /* Attempted to copy an invalid string? */ - if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; - - b0 = (bstring) bstr__alloc (sizeof (struct tagbstring)); - if (b0 == NULL) { - /* Unable to allocate memory for string header */ - return NULL; - } - - i = b->slen; - j = snapUpSize (i + 1); - - b0->data = (unsigned char *) bstr__alloc (j); - if (b0->data == NULL) { - j = i + 1; - b0->data = (unsigned char *) bstr__alloc (j); - if (b0->data == NULL) { - /* Unable to allocate memory for string data */ - bstr__free (b0); - return NULL; - } - } - - b0->mlen = j; - b0->slen = i; - - if (i) bstr__memcpy ((char *) b0->data, (char *) b->data, i); - b0->data[b0->slen] = (unsigned char) '\0'; - - return b0; -} - -/* int bassign (bstring a, const_bstring b) - * - * Overwrite the string a with the contents of string b. - */ -int bassign (bstring a, const_bstring b) { - if (b == NULL || b->data == NULL || b->slen < 0) - return BSTR_ERR; - if (b->slen != 0) { - if (balloc (a, b->slen) != BSTR_OK) return BSTR_ERR; - bstr__memmove (a->data, b->data, b->slen); - } else { - if (a == NULL || a->data == NULL || a->mlen < a->slen || - a->slen < 0 || a->mlen == 0) - return BSTR_ERR; - } - a->data[b->slen] = (unsigned char) '\0'; - a->slen = b->slen; - return BSTR_OK; -} - -/* int bassignmidstr (bstring a, const_bstring b, int left, int len) - * - * Overwrite the string a with the middle of contents of string b - * starting from position left and running for a length len. left and - * len are clamped to the ends of b as with the function bmidstr. - */ -int bassignmidstr (bstring a, const_bstring b, int left, int len) { - if (b == NULL || b->data == NULL || b->slen < 0) - return BSTR_ERR; - - if (left < 0) { - len += left; - left = 0; - } - - if (len > b->slen - left) len = b->slen - left; - - if (a == NULL || a->data == NULL || a->mlen < a->slen || - a->slen < 0 || a->mlen == 0) - return BSTR_ERR; - - if (len > 0) { - if (balloc (a, len) != BSTR_OK) return BSTR_ERR; - bstr__memmove (a->data, b->data + left, len); - a->slen = len; - } else { - a->slen = 0; - } - a->data[a->slen] = (unsigned char) '\0'; - return BSTR_OK; -} - -/* int bassigncstr (bstring a, const char * str) - * - * Overwrite the string a with the contents of char * string str. Note that - * the bstring a must be a well defined and writable bstring. If an error - * occurs BSTR_ERR is returned however a may be partially overwritten. - */ -int bassigncstr (bstring a, const char * str) { -int i; -size_t len; - if (a == NULL || a->data == NULL || a->mlen < a->slen || - a->slen < 0 || a->mlen == 0 || NULL == str) - return BSTR_ERR; - - for (i=0; i < a->mlen; i++) { - if ('\0' == (a->data[i] = str[i])) { - a->slen = i; - return BSTR_OK; - } - } - - a->slen = i; - len = strlen (str + i); - if (len > INT_MAX || i + len + 1 > INT_MAX || - 0 > balloc (a, (int) (i + len + 1))) return BSTR_ERR; - bBlockCopy (a->data + i, str + i, (size_t) len + 1); - a->slen += (int) len; - return BSTR_OK; -} - -/* int bassignblk (bstring a, const void * s, int len) - * - * Overwrite the string a with the contents of the block (s, len). Note that - * the bstring a must be a well defined and writable bstring. If an error - * occurs BSTR_ERR is returned and a is not overwritten. - */ -int bassignblk (bstring a, const void * s, int len) { - if (a == NULL || a->data == NULL || a->mlen < a->slen || - a->slen < 0 || a->mlen == 0 || NULL == s || len + 1 < 1) - return BSTR_ERR; - if (len + 1 > a->mlen && 0 > balloc (a, len + 1)) return BSTR_ERR; - bBlockCopy (a->data, s, (size_t) len); - a->data[len] = (unsigned char) '\0'; - a->slen = len; - return BSTR_OK; -} - -/* int btrunc (bstring b, int n) - * - * Truncate the bstring to at most n characters. - */ -int btrunc (bstring b, int n) { - if (n < 0 || b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - if (b->slen > n) { - b->slen = n; - b->data[n] = (unsigned char) '\0'; - } - return BSTR_OK; -} - -#define upcase(c) (toupper ((unsigned char) c)) -#define downcase(c) (tolower ((unsigned char) c)) -#define wspace(c) (isspace ((unsigned char) c)) - -/* int btoupper (bstring b) - * - * Convert contents of bstring to upper case. - */ -int btoupper (bstring b) { -int i, len; - if (b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - for (i=0, len = b->slen; i < len; i++) { - b->data[i] = (unsigned char) upcase (b->data[i]); - } - return BSTR_OK; -} - -/* int btolower (bstring b) - * - * Convert contents of bstring to lower case. - */ -int btolower (bstring b) { -int i, len; - if (b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - for (i=0, len = b->slen; i < len; i++) { - b->data[i] = (unsigned char) downcase (b->data[i]); - } - return BSTR_OK; -} - -/* int bstricmp (const_bstring b0, const_bstring b1) - * - * Compare two strings without differentiating between case. The return - * value is the difference of the values of the characters where the two - * strings first differ after lower case transformation, otherwise 0 is - * returned indicating that the strings are equal. If the lengths are - * different, then a difference from 0 is given, but if the first extra - * character is '\0', then it is taken to be the value UCHAR_MAX+1. - */ -int bstricmp (const_bstring b0, const_bstring b1) { -int i, v, n; - - if (bdata (b0) == NULL || b0->slen < 0 || - bdata (b1) == NULL || b1->slen < 0) return SHRT_MIN; - if ((n = b0->slen) > b1->slen) n = b1->slen; - else if (b0->slen == b1->slen && b0->data == b1->data) return BSTR_OK; - - for (i = 0; i < n; i ++) { - v = (char) downcase (b0->data[i]) - - (char) downcase (b1->data[i]); - if (0 != v) return v; - } - - if (b0->slen > n) { - v = (char) downcase (b0->data[n]); - if (v) return v; - return UCHAR_MAX + 1; - } - if (b1->slen > n) { - v = - (char) downcase (b1->data[n]); - if (v) return v; - return - (int) (UCHAR_MAX + 1); - } - return BSTR_OK; -} - -/* int bstrnicmp (const_bstring b0, const_bstring b1, int n) - * - * Compare two strings without differentiating between case for at most n - * characters. If the position where the two strings first differ is - * before the nth position, the return value is the difference of the values - * of the characters, otherwise 0 is returned. If the lengths are different - * and less than n characters, then a difference from 0 is given, but if the - * first extra character is '\0', then it is taken to be the value - * UCHAR_MAX+1. - */ -int bstrnicmp (const_bstring b0, const_bstring b1, int n) { -int i, v, m; - - if (bdata (b0) == NULL || b0->slen < 0 || - bdata (b1) == NULL || b1->slen < 0 || n < 0) return SHRT_MIN; - m = n; - if (m > b0->slen) m = b0->slen; - if (m > b1->slen) m = b1->slen; - - if (b0->data != b1->data) { - for (i = 0; i < m; i ++) { - v = (char) downcase (b0->data[i]); - v -= (char) downcase (b1->data[i]); - if (v != 0) return b0->data[i] - b1->data[i]; - } - } - - if (n == m || b0->slen == b1->slen) return BSTR_OK; - - if (b0->slen > m) { - v = (char) downcase (b0->data[m]); - if (v) return v; - return UCHAR_MAX + 1; - } - - v = - (char) downcase (b1->data[m]); - if (v) return v; - return - (int) (UCHAR_MAX + 1); -} - -/* int biseqcaseless (const_bstring b0, const_bstring b1) - * - * Compare two strings for equality without differentiating between case. - * If the strings differ other than in case, 0 is returned, if the strings - * are the same, 1 is returned, if there is an error, -1 is returned. If - * the length of the strings are different, this function is O(1). '\0' - * termination characters are not treated in any special way. - */ -int biseqcaseless (const_bstring b0, const_bstring b1) { -int i, n; - - if (bdata (b0) == NULL || b0->slen < 0 || - bdata (b1) == NULL || b1->slen < 0) return BSTR_ERR; - if (b0->slen != b1->slen) return BSTR_OK; - if (b0->data == b1->data || b0->slen == 0) return 1; - for (i=0, n=b0->slen; i < n; i++) { - if (b0->data[i] != b1->data[i]) { - unsigned char c = (unsigned char) downcase (b0->data[i]); - if (c != (unsigned char) downcase (b1->data[i])) return 0; - } - } - return 1; -} - -/* int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len) - * - * Compare beginning of string b0 with a block of memory of length len - * without differentiating between case for equality. If the beginning of b0 - * differs from the memory block other than in case (or if b0 is too short), - * 0 is returned, if the strings are the same, 1 is returned, if there is an - * error, -1 is returned. '\0' characters are not treated in any special - * way. - */ -int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len) { -int i; - - if (bdata (b0) == NULL || b0->slen < 0 || NULL == blk || len < 0) - return BSTR_ERR; - if (b0->slen < len) return BSTR_OK; - if (b0->data == (const unsigned char *) blk || len == 0) return 1; - - for (i = 0; i < len; i ++) { - if (b0->data[i] != ((const unsigned char *) blk)[i]) { - if (downcase (b0->data[i]) != - downcase (((const unsigned char *) blk)[i])) return 0; - } - } - return 1; -} - -/* - * int bltrimws (bstring b) - * - * Delete whitespace contiguous from the left end of the string. - */ -int bltrimws (bstring b) { -int i, len; - - if (b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - - for (len = b->slen, i = 0; i < len; i++) { - if (!wspace (b->data[i])) { - return bdelete (b, 0, i); - } - } - - b->data[0] = (unsigned char) '\0'; - b->slen = 0; - return BSTR_OK; -} - -/* - * int brtrimws (bstring b) - * - * Delete whitespace contiguous from the right end of the string. - */ -int brtrimws (bstring b) { -int i; - - if (b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - - for (i = b->slen - 1; i >= 0; i--) { - if (!wspace (b->data[i])) { - if (b->mlen > i) b->data[i+1] = (unsigned char) '\0'; - b->slen = i + 1; - return BSTR_OK; - } - } - - b->data[0] = (unsigned char) '\0'; - b->slen = 0; - return BSTR_OK; -} - -/* - * int btrimws (bstring b) - * - * Delete whitespace contiguous from both ends of the string. - */ -int btrimws (bstring b) { -int i, j; - - if (b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - - for (i = b->slen - 1; i >= 0; i--) { - if (!wspace (b->data[i])) { - if (b->mlen > i) b->data[i+1] = (unsigned char) '\0'; - b->slen = i + 1; - for (j = 0; wspace (b->data[j]); j++) {} - return bdelete (b, 0, j); - } - } - - b->data[0] = (unsigned char) '\0'; - b->slen = 0; - return BSTR_OK; -} - -/* int biseq (const_bstring b0, const_bstring b1) - * - * Compare the string b0 and b1. If the strings differ, 0 is returned, if - * the strings are the same, 1 is returned, if there is an error, -1 is - * returned. If the length of the strings are different, this function is - * O(1). '\0' termination characters are not treated in any special way. - */ -int biseq (const_bstring b0, const_bstring b1) { - if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || - b0->slen < 0 || b1->slen < 0) return BSTR_ERR; - if (b0->slen != b1->slen) return BSTR_OK; - if (b0->data == b1->data || b0->slen == 0) return 1; - return !bstr__memcmp (b0->data, b1->data, b0->slen); -} - -/* int bisstemeqblk (const_bstring b0, const void * blk, int len) - * - * Compare beginning of string b0 with a block of memory of length len for - * equality. If the beginning of b0 differs from the memory block (or if b0 - * is too short), 0 is returned, if the strings are the same, 1 is returned, - * if there is an error, -1 is returned. '\0' characters are not treated in - * any special way. - */ -int bisstemeqblk (const_bstring b0, const void * blk, int len) { -int i; - - if (bdata (b0) == NULL || b0->slen < 0 || NULL == blk || len < 0) - return BSTR_ERR; - if (b0->slen < len) return BSTR_OK; - if (b0->data == (const unsigned char *) blk || len == 0) return 1; - - for (i = 0; i < len; i ++) { - if (b0->data[i] != ((const unsigned char *) blk)[i]) return BSTR_OK; - } - return 1; -} - -/* int biseqcstr (const_bstring b, const char *s) - * - * Compare the bstring b and char * string s. The C string s must be '\0' - * terminated at exactly the length of the bstring b, and the contents - * between the two must be identical with the bstring b with no '\0' - * characters for the two contents to be considered equal. This is - * equivalent to the condition that their current contents will be always be - * equal when comparing them in the same format after converting one or the - * other. If the strings are equal 1 is returned, if they are unequal 0 is - * returned and if there is a detectable error BSTR_ERR is returned. - */ -int biseqcstr (const_bstring b, const char * s) { -int i; - if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR; - for (i=0; i < b->slen; i++) { - if (s[i] == '\0' || b->data[i] != (unsigned char) s[i]) return BSTR_OK; - } - return s[i] == '\0'; -} - -/* int biseqcstrcaseless (const_bstring b, const char *s) - * - * Compare the bstring b and char * string s. The C string s must be '\0' - * terminated at exactly the length of the bstring b, and the contents - * between the two must be identical except for case with the bstring b with - * no '\0' characters for the two contents to be considered equal. This is - * equivalent to the condition that their current contents will be always be - * equal ignoring case when comparing them in the same format after - * converting one or the other. If the strings are equal, except for case, - * 1 is returned, if they are unequal regardless of case 0 is returned and - * if there is a detectable error BSTR_ERR is returned. - */ -int biseqcstrcaseless (const_bstring b, const char * s) { -int i; - if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR; - for (i=0; i < b->slen; i++) { - if (s[i] == '\0' || - (b->data[i] != (unsigned char) s[i] && - downcase (b->data[i]) != (unsigned char) downcase (s[i]))) - return BSTR_OK; - } - return s[i] == '\0'; -} - -/* int bstrcmp (const_bstring b0, const_bstring b1) - * - * Compare the string b0 and b1. If there is an error, SHRT_MIN is returned, - * otherwise a value less than or greater than zero, indicating that the - * string pointed to by b0 is lexicographically less than or greater than - * the string pointed to by b1 is returned. If the the string lengths are - * unequal but the characters up until the length of the shorter are equal - * then a value less than, or greater than zero, indicating that the string - * pointed to by b0 is shorter or longer than the string pointed to by b1 is - * returned. 0 is returned if and only if the two strings are the same. If - * the length of the strings are different, this function is O(n). Like its - * standard C library counter part strcmp, the comparison does not proceed - * past any '\0' termination characters encountered. - */ -int bstrcmp (const_bstring b0, const_bstring b1) { -int i, v, n; - - if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || - b0->slen < 0 || b1->slen < 0) return SHRT_MIN; - n = b0->slen; if (n > b1->slen) n = b1->slen; - if (b0->slen == b1->slen && (b0->data == b1->data || b0->slen == 0)) - return BSTR_OK; - - for (i = 0; i < n; i ++) { - v = ((char) b0->data[i]) - ((char) b1->data[i]); - if (v != 0) return v; - if (b0->data[i] == (unsigned char) '\0') return BSTR_OK; - } - - if (b0->slen > n) return 1; - if (b1->slen > n) return -1; - return BSTR_OK; -} - -/* int bstrncmp (const_bstring b0, const_bstring b1, int n) - * - * Compare the string b0 and b1 for at most n characters. If there is an - * error, SHRT_MIN is returned, otherwise a value is returned as if b0 and - * b1 were first truncated to at most n characters then bstrcmp was called - * with these new strings are paremeters. If the length of the strings are - * different, this function is O(n). Like its standard C library counter - * part strcmp, the comparison does not proceed past any '\0' termination - * characters encountered. - */ -int bstrncmp (const_bstring b0, const_bstring b1, int n) { -int i, v, m; - - if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || - b0->slen < 0 || b1->slen < 0) return SHRT_MIN; - m = n; - if (m > b0->slen) m = b0->slen; - if (m > b1->slen) m = b1->slen; - - if (b0->data != b1->data) { - for (i = 0; i < m; i ++) { - v = ((char) b0->data[i]) - ((char) b1->data[i]); - if (v != 0) return v; - if (b0->data[i] == (unsigned char) '\0') return BSTR_OK; - } - } - - if (n == m || b0->slen == b1->slen) return BSTR_OK; - - if (b0->slen > m) return 1; - return -1; -} - -/* bstring bmidstr (const_bstring b, int left, int len) - * - * Create a bstring which is the substring of b starting from position left - * and running for a length len (clamped by the end of the bstring b.) If - * b is detectably invalid, then NULL is returned. The section described - * by (left, len) is clamped to the boundaries of b. - */ -bstring bmidstr (const_bstring b, int left, int len) { - - if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; - - if (left < 0) { - len += left; - left = 0; - } - - if (len > b->slen - left) len = b->slen - left; - - if (len <= 0) return bfromcstr (""); - return blk2bstr (b->data + left, len); -} - -/* int bdelete (bstring b, int pos, int len) - * - * Removes characters from pos to pos+len-1 inclusive and shifts the tail of - * the bstring starting from pos+len to pos. len must be positive for this - * call to have any effect. The section of the string described by (pos, - * len) is clamped to boundaries of the bstring b. - */ -int bdelete (bstring b, int pos, int len) { - /* Clamp to left side of bstring */ - if (pos < 0) { - len += pos; - pos = 0; - } - - if (len < 0 || b == NULL || b->data == NULL || b->slen < 0 || - b->mlen < b->slen || b->mlen <= 0) - return BSTR_ERR; - if (len > 0 && pos < b->slen) { - if (pos + len >= b->slen) { - b->slen = pos; - } else { - bBlockCopy ((char *) (b->data + pos), - (char *) (b->data + pos + len), - b->slen - (pos+len)); - b->slen -= len; - } - b->data[b->slen] = (unsigned char) '\0'; - } - return BSTR_OK; -} - -/* int bdestroy (bstring b) - * - * Free up the bstring. Note that if b is detectably invalid or not writable - * then no action is performed and BSTR_ERR is returned. Like a freed memory - * allocation, dereferences, writes or any other action on b after it has - * been bdestroyed is undefined. - */ -int bdestroy (bstring b) { - if (b == NULL || b->slen < 0 || b->mlen <= 0 || b->mlen < b->slen || - b->data == NULL) - return BSTR_ERR; - - bstr__free (b->data); - - /* In case there is any stale usage, there is one more chance to - notice this error. */ - - b->slen = -1; - b->mlen = -__LINE__; - b->data = NULL; - - bstr__free (b); - return BSTR_OK; -} - -/* int binstr (const_bstring b1, int pos, const_bstring b2) - * - * Search for the bstring b2 in b1 starting from position pos, and searching - * forward. If it is found then return with the first position where it is - * found, otherwise return BSTR_ERR. Note that this is just a brute force - * string searcher that does not attempt clever things like the Boyer-Moore - * search algorithm. Because of this there are many degenerate cases where - * this can take much longer than it needs to. - */ -int binstr (const_bstring b1, int pos, const_bstring b2) { -int j, ii, ll, lf; -unsigned char * d0; -unsigned char c0; -register unsigned char * d1; -register unsigned char c1; -register int i; - - if (b1 == NULL || b1->data == NULL || b1->slen < 0 || - b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; - if (b1->slen == pos) return (b2->slen == 0)?pos:BSTR_ERR; - if (b1->slen < pos || pos < 0) return BSTR_ERR; - if (b2->slen == 0) return pos; - - /* No space to find such a string? */ - if ((lf = b1->slen - b2->slen + 1) <= pos) return BSTR_ERR; - - /* An obvious alias case */ - if (b1->data == b2->data && pos == 0) return 0; - - i = pos; - - d0 = b2->data; - d1 = b1->data; - ll = b2->slen; - - /* Peel off the b2->slen == 1 case */ - c0 = d0[0]; - if (1 == ll) { - for (;i < lf; i++) if (c0 == d1[i]) return i; - return BSTR_ERR; - } - - c1 = c0; - j = 0; - lf = b1->slen - 1; - - ii = -1; - if (i < lf) do { - /* Unrolled current character test */ - if (c1 != d1[i]) { - if (c1 != d1[1+i]) { - i += 2; - continue; - } - i++; - } - - /* Take note if this is the start of a potential match */ - if (0 == j) ii = i; - - /* Shift the test character down by one */ - j++; - i++; - - /* If this isn't past the last character continue */ - if (j < ll) { - c1 = d0[j]; - continue; - } - - N0:; - - /* If no characters mismatched, then we matched */ - if (i == ii+j) return ii; - - /* Shift back to the beginning */ - i -= j; - j = 0; - c1 = c0; - } while (i < lf); - - /* Deal with last case if unrolling caused a misalignment */ - if (i == lf && ll == j+1 && c1 == d1[i]) goto N0; - - return BSTR_ERR; -} - -/* int binstrr (const_bstring b1, int pos, const_bstring b2) - * - * Search for the bstring b2 in b1 starting from position pos, and searching - * backward. If it is found then return with the first position where it is - * found, otherwise return BSTR_ERR. Note that this is just a brute force - * string searcher that does not attempt clever things like the Boyer-Moore - * search algorithm. Because of this there are many degenerate cases where - * this can take much longer than it needs to. - */ -int binstrr (const_bstring b1, int pos, const_bstring b2) { -int j, i, l; -unsigned char * d0, * d1; - - if (b1 == NULL || b1->data == NULL || b1->slen < 0 || - b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; - if (b1->slen == pos && b2->slen == 0) return pos; - if (b1->slen < pos || pos < 0) return BSTR_ERR; - if (b2->slen == 0) return pos; - - /* Obvious alias case */ - if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return 0; - - i = pos; - if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR; - - /* If no space to find such a string then snap back */ - if (l + 1 <= i) i = l; - j = 0; - - d0 = b2->data; - d1 = b1->data; - l = b2->slen; - - for (;;) { - if (d0[j] == d1[i + j]) { - j ++; - if (j >= l) return i; - } else { - i --; - if (i < 0) break; - j=0; - } - } - - return BSTR_ERR; -} - -/* int binstrcaseless (const_bstring b1, int pos, const_bstring b2) - * - * Search for the bstring b2 in b1 starting from position pos, and searching - * forward but without regard to case. If it is found then return with the - * first position where it is found, otherwise return BSTR_ERR. Note that - * this is just a brute force string searcher that does not attempt clever - * things like the Boyer-Moore search algorithm. Because of this there are - * many degenerate cases where this can take much longer than it needs to. - */ -int binstrcaseless (const_bstring b1, int pos, const_bstring b2) { -int j, i, l, ll; -unsigned char * d0, * d1; - - if (b1 == NULL || b1->data == NULL || b1->slen < 0 || - b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; - if (b1->slen == pos) return (b2->slen == 0)?pos:BSTR_ERR; - if (b1->slen < pos || pos < 0) return BSTR_ERR; - if (b2->slen == 0) return pos; - - l = b1->slen - b2->slen + 1; - - /* No space to find such a string? */ - if (l <= pos) return BSTR_ERR; - - /* An obvious alias case */ - if (b1->data == b2->data && pos == 0) return BSTR_OK; - - i = pos; - j = 0; - - d0 = b2->data; - d1 = b1->data; - ll = b2->slen; - - for (;;) { - if (d0[j] == d1[i + j] || downcase (d0[j]) == downcase (d1[i + j])) { - j ++; - if (j >= ll) return i; - } else { - i ++; - if (i >= l) break; - j=0; - } - } - - return BSTR_ERR; -} - -/* int binstrrcaseless (const_bstring b1, int pos, const_bstring b2) - * - * Search for the bstring b2 in b1 starting from position pos, and searching - * backward but without regard to case. If it is found then return with the - * first position where it is found, otherwise return BSTR_ERR. Note that - * this is just a brute force string searcher that does not attempt clever - * things like the Boyer-Moore search algorithm. Because of this there are - * many degenerate cases where this can take much longer than it needs to. - */ -int binstrrcaseless (const_bstring b1, int pos, const_bstring b2) { -int j, i, l; -unsigned char * d0, * d1; - - if (b1 == NULL || b1->data == NULL || b1->slen < 0 || - b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; - if (b1->slen == pos && b2->slen == 0) return pos; - if (b1->slen < pos || pos < 0) return BSTR_ERR; - if (b2->slen == 0) return pos; - - /* Obvious alias case */ - if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return BSTR_OK; - - i = pos; - if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR; - - /* If no space to find such a string then snap back */ - if (l + 1 <= i) i = l; - j = 0; - - d0 = b2->data; - d1 = b1->data; - l = b2->slen; - - for (;;) { - if (d0[j] == d1[i + j] || downcase (d0[j]) == downcase (d1[i + j])) { - j ++; - if (j >= l) return i; - } else { - i --; - if (i < 0) break; - j=0; - } - } - - return BSTR_ERR; -} - - -/* int bstrchrp (const_bstring b, int c, int pos) - * - * Search for the character c in b forwards from the position pos - * (inclusive). - */ -int bstrchrp (const_bstring b, int c, int pos) { -unsigned char * p; - - if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR; - p = (unsigned char *) bstr__memchr ((b->data + pos), (unsigned char) c, (b->slen - pos)); - if (p) return (int) (p - b->data); - return BSTR_ERR; -} - -/* int bstrrchrp (const_bstring b, int c, int pos) - * - * Search for the character c in b backwards from the position pos in string - * (inclusive). - */ -int bstrrchrp (const_bstring b, int c, int pos) { -int i; - - if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR; - for (i=pos; i >= 0; i--) { - if (b->data[i] == (unsigned char) c) return i; - } - return BSTR_ERR; -} - -#if !defined (BSTRLIB_AGGRESSIVE_MEMORY_FOR_SPEED_TRADEOFF) -#define LONG_LOG_BITS_QTY (3) -#define LONG_BITS_QTY (1 << LONG_LOG_BITS_QTY) -#define LONG_TYPE unsigned char - -#define CFCLEN ((1 << CHAR_BIT) / LONG_BITS_QTY) -struct charField { LONG_TYPE content[CFCLEN]; }; -#define testInCharField(cf,c) ((cf)->content[(c) >> LONG_LOG_BITS_QTY] & (((long)1) << ((c) & (LONG_BITS_QTY-1)))) -#define setInCharField(cf,idx) { \ - unsigned int c = (unsigned int) (idx); \ - (cf)->content[c >> LONG_LOG_BITS_QTY] |= (LONG_TYPE) (1ul << (c & (LONG_BITS_QTY-1))); \ -} - -#else - -#define CFCLEN (1 << CHAR_BIT) -struct charField { unsigned char content[CFCLEN]; }; -#define testInCharField(cf,c) ((cf)->content[(unsigned char) (c)]) -#define setInCharField(cf,idx) (cf)->content[(unsigned int) (idx)] = ~0 - -#endif - -/* Convert a bstring to charField */ -static int buildCharField (struct charField * cf, const_bstring b) { -int i; - if (b == NULL || b->data == NULL || b->slen <= 0) return BSTR_ERR; - memset ((void *) cf->content, 0, sizeof (struct charField)); - for (i=0; i < b->slen; i++) { - setInCharField (cf, b->data[i]); - } - return BSTR_OK; -} - -static void invertCharField (struct charField * cf) { -int i; - for (i=0; i < CFCLEN; i++) cf->content[i] = ~cf->content[i]; -} - -/* Inner engine for binchr */ -static int binchrCF (const unsigned char * data, int len, int pos, const struct charField * cf) { -int i; - for (i=pos; i < len; i++) { - unsigned char c = (unsigned char) data[i]; - if (testInCharField (cf, c)) return i; - } - return BSTR_ERR; -} - -/* int binchr (const_bstring b0, int pos, const_bstring b1); - * - * Search for the first position in b0 starting from pos or after, in which - * one of the characters in b1 is found and return it. If such a position - * does not exist in b0, then BSTR_ERR is returned. - */ -int binchr (const_bstring b0, int pos, const_bstring b1) { -struct charField chrs; - if (pos < 0 || b0 == NULL || b0->data == NULL || - b0->slen <= pos) return BSTR_ERR; - if (1 == b1->slen) return bstrchrp (b0, b1->data[0], pos); - if (0 > buildCharField (&chrs, b1)) return BSTR_ERR; - return binchrCF (b0->data, b0->slen, pos, &chrs); -} - -/* Inner engine for binchrr */ -static int binchrrCF (const unsigned char * data, int pos, const struct charField * cf) { -int i; - for (i=pos; i >= 0; i--) { - unsigned int c = (unsigned int) data[i]; - if (testInCharField (cf, c)) return i; - } - return BSTR_ERR; -} - -/* int binchrr (const_bstring b0, int pos, const_bstring b1); - * - * Search for the last position in b0 no greater than pos, in which one of - * the characters in b1 is found and return it. If such a position does not - * exist in b0, then BSTR_ERR is returned. - */ -int binchrr (const_bstring b0, int pos, const_bstring b1) { -struct charField chrs; - if (pos < 0 || b0 == NULL || b0->data == NULL || b1 == NULL || - b0->slen < pos) return BSTR_ERR; - if (pos == b0->slen) pos--; - if (1 == b1->slen) return bstrrchrp (b0, b1->data[0], pos); - if (0 > buildCharField (&chrs, b1)) return BSTR_ERR; - return binchrrCF (b0->data, pos, &chrs); -} - -/* int bninchr (const_bstring b0, int pos, const_bstring b1); - * - * Search for the first position in b0 starting from pos or after, in which - * none of the characters in b1 is found and return it. If such a position - * does not exist in b0, then BSTR_ERR is returned. - */ -int bninchr (const_bstring b0, int pos, const_bstring b1) { -struct charField chrs; - if (pos < 0 || b0 == NULL || b0->data == NULL || - b0->slen <= pos) return BSTR_ERR; - if (buildCharField (&chrs, b1) < 0) return BSTR_ERR; - invertCharField (&chrs); - return binchrCF (b0->data, b0->slen, pos, &chrs); -} - -/* int bninchrr (const_bstring b0, int pos, const_bstring b1); - * - * Search for the last position in b0 no greater than pos, in which none of - * the characters in b1 is found and return it. If such a position does not - * exist in b0, then BSTR_ERR is returned. - */ -int bninchrr (const_bstring b0, int pos, const_bstring b1) { -struct charField chrs; - if (pos < 0 || b0 == NULL || b0->data == NULL || - b0->slen < pos) return BSTR_ERR; - if (pos == b0->slen) pos--; - if (buildCharField (&chrs, b1) < 0) return BSTR_ERR; - invertCharField (&chrs); - return binchrrCF (b0->data, pos, &chrs); -} - -/* int bsetstr (bstring b0, int pos, bstring b1, unsigned char fill) - * - * Overwrite the string b0 starting at position pos with the string b1. If - * the position pos is past the end of b0, then the character "fill" is - * appended as necessary to make up the gap between the end of b0 and pos. - * If b1 is NULL, it behaves as if it were a 0-length string. - */ -int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill) { -int d, newlen; -ptrdiff_t pd; -bstring aux = (bstring) b1; - - if (pos < 0 || b0 == NULL || b0->slen < 0 || NULL == b0->data || - b0->mlen < b0->slen || b0->mlen <= 0) return BSTR_ERR; - if (b1 != NULL && (b1->slen < 0 || b1->data == NULL)) return BSTR_ERR; - - d = pos; - - /* Aliasing case */ - if (NULL != aux) { - if ((pd = (ptrdiff_t) (b1->data - b0->data)) >= 0 && pd < (ptrdiff_t) b0->mlen) { - if (NULL == (aux = bstrcpy (b1))) return BSTR_ERR; - } - d += aux->slen; - } - - /* Increase memory size if necessary */ - if (balloc (b0, d + 1) != BSTR_OK) { - if (aux != b1) bdestroy (aux); - return BSTR_ERR; - } - - newlen = b0->slen; - - /* Fill in "fill" character as necessary */ - if (pos > newlen) { - bstr__memset (b0->data + b0->slen, (int) fill, (size_t) (pos - b0->slen)); - newlen = pos; - } - - /* Copy b1 to position pos in b0. */ - if (aux != NULL) { - bBlockCopy ((char *) (b0->data + pos), (char *) aux->data, aux->slen); - if (aux != b1) bdestroy (aux); - } - - /* Indicate the potentially increased size of b0 */ - if (d > newlen) newlen = d; - - b0->slen = newlen; - b0->data[newlen] = (unsigned char) '\0'; - - return BSTR_OK; -} - -/* int binsert (bstring b1, int pos, bstring b2, unsigned char fill) - * - * Inserts the string b2 into b1 at position pos. If the position pos is - * past the end of b1, then the character "fill" is appended as necessary to - * make up the gap between the end of b1 and pos. Unlike bsetstr, binsert - * does not allow b2 to be NULL. - */ -int binsert (bstring b1, int pos, const_bstring b2, unsigned char fill) { -int d, l; -ptrdiff_t pd; -bstring aux = (bstring) b2; - - if (pos < 0 || b1 == NULL || b2 == NULL || b1->slen < 0 || - b2->slen < 0 || b1->mlen < b1->slen || b1->mlen <= 0) return BSTR_ERR; - - /* Aliasing case */ - if ((pd = (ptrdiff_t) (b2->data - b1->data)) >= 0 && pd < (ptrdiff_t) b1->mlen) { - if (NULL == (aux = bstrcpy (b2))) return BSTR_ERR; - } - - /* Compute the two possible end pointers */ - d = b1->slen + aux->slen; - l = pos + aux->slen; - if ((d|l) < 0) return BSTR_ERR; - - if (l > d) { - /* Inserting past the end of the string */ - if (balloc (b1, l + 1) != BSTR_OK) { - if (aux != b2) bdestroy (aux); - return BSTR_ERR; - } - bstr__memset (b1->data + b1->slen, (int) fill, (size_t) (pos - b1->slen)); - b1->slen = l; - } else { - /* Inserting in the middle of the string */ - if (balloc (b1, d + 1) != BSTR_OK) { - if (aux != b2) bdestroy (aux); - return BSTR_ERR; - } - bBlockCopy (b1->data + l, b1->data + pos, d - l); - b1->slen = d; - } - bBlockCopy (b1->data + pos, aux->data, aux->slen); - b1->data[b1->slen] = (unsigned char) '\0'; - if (aux != b2) bdestroy (aux); - return BSTR_OK; -} - -/* int breplace (bstring b1, int pos, int len, bstring b2, - * unsigned char fill) - * - * Replace a section of a string from pos for a length len with the string b2. - * fill is used is pos > b1->slen. - */ -int breplace (bstring b1, int pos, int len, const_bstring b2, - unsigned char fill) { -int pl, ret; -ptrdiff_t pd; -bstring aux = (bstring) b2; - - if (pos < 0 || len < 0 || (pl = pos + len) < 0 || b1 == NULL || - b2 == NULL || b1->data == NULL || b2->data == NULL || - b1->slen < 0 || b2->slen < 0 || b1->mlen < b1->slen || - b1->mlen <= 0) return BSTR_ERR; - - /* Straddles the end? */ - if (pl >= b1->slen) { - if ((ret = bsetstr (b1, pos, b2, fill)) < 0) return ret; - if (pos + b2->slen < b1->slen) { - b1->slen = pos + b2->slen; - b1->data[b1->slen] = (unsigned char) '\0'; - } - return ret; - } - - /* Aliasing case */ - if ((pd = (ptrdiff_t) (b2->data - b1->data)) >= 0 && pd < (ptrdiff_t) b1->slen) { - if (NULL == (aux = bstrcpy (b2))) return BSTR_ERR; - } - - if (aux->slen > len) { - if (balloc (b1, b1->slen + aux->slen - len) != BSTR_OK) { - if (aux != b2) bdestroy (aux); - return BSTR_ERR; - } - } - - if (aux->slen != len) bstr__memmove (b1->data + pos + aux->slen, b1->data + pos + len, b1->slen - (pos + len)); - bstr__memcpy (b1->data + pos, aux->data, aux->slen); - b1->slen += aux->slen - len; - b1->data[b1->slen] = (unsigned char) '\0'; - if (aux != b2) bdestroy (aux); - return BSTR_OK; -} - -/* int bfindreplace (bstring b, const_bstring find, const_bstring repl, - * int pos) - * - * Replace all occurrences of a find string with a replace string after a - * given point in a bstring. - */ - -typedef int (*instr_fnptr) (const_bstring s1, int pos, const_bstring s2); - -static int findreplaceengine (bstring b, const_bstring find, const_bstring repl, int pos, instr_fnptr instr) { -int i, ret, slen, mlen, delta, acc; -int * d; -int static_d[32]; -ptrdiff_t pd; -bstring auxf = (bstring) find; -bstring auxr = (bstring) repl; - - if (b == NULL || b->data == NULL || find == NULL || - find->data == NULL || repl == NULL || repl->data == NULL || - pos < 0 || find->slen <= 0 || b->mlen < 0 || b->slen > b->mlen || - b->mlen <= 0 || b->slen < 0 || repl->slen < 0) return BSTR_ERR; - if (pos > b->slen - find->slen) return BSTR_OK; - - /* Alias with find string */ - pd = (ptrdiff_t) (find->data - b->data); - if ((ptrdiff_t) (pos - find->slen) < pd && pd < (ptrdiff_t) b->slen) { - if (NULL == (auxf = bstrcpy (find))) return BSTR_ERR; - } - - /* Alias with repl string */ - pd = (ptrdiff_t) (repl->data - b->data); - if ((ptrdiff_t) (pos - repl->slen) < pd && pd < (ptrdiff_t) b->slen) { - if (NULL == (auxr = bstrcpy (repl))) { - if (auxf != find) bdestroy (auxf); - return BSTR_ERR; - } - } - - delta = auxf->slen - auxr->slen; - - /* in-place replacement since find and replace strings are of equal - length */ - if (delta == 0) { - while ((pos = instr (b, pos, auxf)) >= 0) { - bstr__memcpy (b->data + pos, auxr->data, auxr->slen); - pos += auxf->slen; - } - if (auxf != find) bdestroy (auxf); - if (auxr != repl) bdestroy (auxr); - return BSTR_OK; - } - - /* shrinking replacement since auxf->slen > auxr->slen */ - if (delta > 0) { - acc = 0; - - while ((i = instr (b, pos, auxf)) >= 0) { - if (acc && i > pos) - bstr__memmove (b->data + pos - acc, b->data + pos, i - pos); - if (auxr->slen) - bstr__memcpy (b->data + i - acc, auxr->data, auxr->slen); - acc += delta; - pos = i + auxf->slen; - } - - if (acc) { - i = b->slen; - if (i > pos) - bstr__memmove (b->data + pos - acc, b->data + pos, i - pos); - b->slen -= acc; - b->data[b->slen] = (unsigned char) '\0'; - } - - if (auxf != find) bdestroy (auxf); - if (auxr != repl) bdestroy (auxr); - return BSTR_OK; - } - - /* expanding replacement since find->slen < repl->slen. Its a lot - more complicated. */ - - mlen = 32; - d = (int *) static_d; /* Avoid malloc for trivial cases */ - acc = slen = 0; - - while ((pos = instr (b, pos, auxf)) >= 0) { - if (slen + 1 >= mlen) { - int sl; - int * t; - mlen += mlen; - sl = sizeof (int *) * mlen; - if (static_d == d) d = NULL; - if (sl < mlen || NULL == (t = (int *) bstr__realloc (d, sl))) { - ret = BSTR_ERR; - goto done; - } - if (NULL == d) bstr__memcpy (t, static_d, sizeof (static_d)); - d = t; - } - d[slen] = pos; - slen++; - acc -= delta; - pos += auxf->slen; - if (pos < 0 || acc < 0) { - ret = BSTR_ERR; - goto done; - } - } - d[slen] = b->slen; - - if (BSTR_OK == (ret = balloc (b, b->slen + acc + 1))) { - b->slen += acc; - for (i = slen-1; i >= 0; i--) { - int s, l; - s = d[i] + auxf->slen; - l = d[i+1] - s; - if (l) { - bstr__memmove (b->data + s + acc, b->data + s, l); - } - if (auxr->slen) { - bstr__memmove (b->data + s + acc - auxr->slen, - auxr->data, auxr->slen); - } - acc += delta; - } - b->data[b->slen] = (unsigned char) '\0'; - } - - done:; - if (static_d == d) d = NULL; - bstr__free (d); - if (auxf != find) bdestroy (auxf); - if (auxr != repl) bdestroy (auxr); - return ret; -} - -/* int bfindreplace (bstring b, const_bstring find, const_bstring repl, - * int pos) - * - * Replace all occurrences of a find string with a replace string after a - * given point in a bstring. - */ -int bfindreplace (bstring b, const_bstring find, const_bstring repl, int pos) { - return findreplaceengine (b, find, repl, pos, binstr); -} - -/* int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, - * int pos) - * - * Replace all occurrences of a find string, ignoring case, with a replace - * string after a given point in a bstring. - */ -int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, int pos) { - return findreplaceengine (b, find, repl, pos, binstrcaseless); -} - -/* int binsertch (bstring b, int pos, int len, unsigned char fill) - * - * Inserts the character fill repeatedly into b at position pos for a - * length len. If the position pos is past the end of b, then the - * character "fill" is appended as necessary to make up the gap between the - * end of b and the position pos + len. - */ -int binsertch (bstring b, int pos, int len, unsigned char fill) { -int d, l, i; - - if (pos < 0 || b == NULL || b->slen < 0 || b->mlen < b->slen || - b->mlen <= 0 || len < 0) return BSTR_ERR; - - /* Compute the two possible end pointers */ - d = b->slen + len; - l = pos + len; - if ((d|l) < 0) return BSTR_ERR; - - if (l > d) { - /* Inserting past the end of the string */ - if (balloc (b, l + 1) != BSTR_OK) return BSTR_ERR; - pos = b->slen; - b->slen = l; - } else { - /* Inserting in the middle of the string */ - if (balloc (b, d + 1) != BSTR_OK) return BSTR_ERR; - for (i = d - 1; i >= l; i--) { - b->data[i] = b->data[i - len]; - } - b->slen = d; - } - - for (i=pos; i < l; i++) b->data[i] = fill; - b->data[b->slen] = (unsigned char) '\0'; - return BSTR_OK; -} - -/* int bpattern (bstring b, int len) - * - * Replicate the bstring, b in place, end to end repeatedly until it - * surpasses len characters, then chop the result to exactly len characters. - * This function operates in-place. The function will return with BSTR_ERR - * if b is NULL or of length 0, otherwise BSTR_OK is returned. - */ -int bpattern (bstring b, int len) { -int i, d; - - d = blength (b); - if (d <= 0 || len < 0 || balloc (b, len + 1) != BSTR_OK) return BSTR_ERR; - if (len > 0) { - if (d == 1) return bsetstr (b, len, NULL, b->data[0]); - for (i = d; i < len; i++) b->data[i] = b->data[i - d]; - } - b->data[len] = (unsigned char) '\0'; - b->slen = len; - return BSTR_OK; -} - -#define BS_BUFF_SZ (1024) - -/* int breada (bstring b, bNread readPtr, void * parm) - * - * Use a finite buffer fread-like function readPtr to concatenate to the - * bstring b the entire contents of file-like source data in a roughly - * efficient way. - */ -int breada (bstring b, bNread readPtr, void * parm) { -int i, l, n; - - if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || - b->mlen <= 0 || readPtr == NULL) return BSTR_ERR; - - i = b->slen; - for (n=i+16; ; n += ((n < BS_BUFF_SZ) ? n : BS_BUFF_SZ)) { - if (BSTR_OK != balloc (b, n + 1)) return BSTR_ERR; - l = (int) readPtr ((void *) (b->data + i), 1, n - i, parm); - i += l; - b->slen = i; - if (i < n) break; - } - - b->data[i] = (unsigned char) '\0'; - return BSTR_OK; -} - -/* bstring bread (bNread readPtr, void * parm) - * - * Use a finite buffer fread-like function readPtr to create a bstring - * filled with the entire contents of file-like source data in a roughly - * efficient way. - */ -bstring bread (bNread readPtr, void * parm) { -bstring buff; - - if (0 > breada (buff = bfromcstr (""), readPtr, parm)) { - bdestroy (buff); - return NULL; - } - return buff; -} - -/* int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator) - * - * Use an fgetc-like single character stream reading function (getcPtr) to - * obtain a sequence of characters which are concatenated to the end of the - * bstring b. The stream read is terminated by the passed in terminator - * parameter. - * - * If getcPtr returns with a negative number, or the terminator character - * (which is appended) is read, then the stream reading is halted and the - * function returns with a partial result in b. If there is an empty partial - * result, 1 is returned. If no characters are read, or there is some other - * detectable error, BSTR_ERR is returned. - */ -int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator) { -int c, d, e; - - if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || - b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR; - d = 0; - e = b->mlen - 2; - - while ((c = getcPtr (parm)) >= 0) { - if (d > e) { - b->slen = d; - if (balloc (b, d + 2) != BSTR_OK) return BSTR_ERR; - e = b->mlen - 2; - } - b->data[d] = (unsigned char) c; - d++; - if (c == terminator) break; - } - - b->data[d] = (unsigned char) '\0'; - b->slen = d; - - return d == 0 && c < 0; -} - -/* int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator) - * - * Use an fgetc-like single character stream reading function (getcPtr) to - * obtain a sequence of characters which are concatenated to the end of the - * bstring b. The stream read is terminated by the passed in terminator - * parameter. - * - * If getcPtr returns with a negative number, or the terminator character - * (which is appended) is read, then the stream reading is halted and the - * function returns with a partial result concatentated to b. If there is - * an empty partial result, 1 is returned. If no characters are read, or - * there is some other detectable error, BSTR_ERR is returned. - */ -int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator) { -int c, d, e; - - if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || - b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR; - d = b->slen; - e = b->mlen - 2; - - while ((c = getcPtr (parm)) >= 0) { - if (d > e) { - b->slen = d; - if (balloc (b, d + 2) != BSTR_OK) return BSTR_ERR; - e = b->mlen - 2; - } - b->data[d] = (unsigned char) c; - d++; - if (c == terminator) break; - } - - b->data[d] = (unsigned char) '\0'; - b->slen = d; - - return d == 0 && c < 0; -} - -/* bstring bgets (bNgetc getcPtr, void * parm, char terminator) - * - * Use an fgetc-like single character stream reading function (getcPtr) to - * obtain a sequence of characters which are concatenated into a bstring. - * The stream read is terminated by the passed in terminator function. - * - * If getcPtr returns with a negative number, or the terminator character - * (which is appended) is read, then the stream reading is halted and the - * result obtained thus far is returned. If no characters are read, or - * there is some other detectable error, NULL is returned. - */ -bstring bgets (bNgetc getcPtr, void * parm, char terminator) { -bstring buff; - - if (0 > bgetsa (buff = bfromcstr (""), getcPtr, parm, terminator) || 0 >= buff->slen) { - bdestroy (buff); - buff = NULL; - } - return buff; -} - -struct bStream { - bstring buff; /* Buffer for over-reads */ - void * parm; /* The stream handle for core stream */ - bNread readFnPtr; /* fread compatible fnptr for core stream */ - int isEOF; /* track file's EOF state */ - int maxBuffSz; -}; - -/* struct bStream * bsopen (bNread readPtr, void * parm) - * - * Wrap a given open stream (described by a fread compatible function - * pointer and stream handle) into an open bStream suitable for the bstring - * library streaming functions. - */ -struct bStream * bsopen (bNread readPtr, void * parm) { -struct bStream * s; - - if (readPtr == NULL) return NULL; - s = (struct bStream *) bstr__alloc (sizeof (struct bStream)); - if (s == NULL) return NULL; - s->parm = parm; - s->buff = bfromcstr (""); - s->readFnPtr = readPtr; - s->maxBuffSz = BS_BUFF_SZ; - s->isEOF = 0; - return s; -} - -/* int bsbufflength (struct bStream * s, int sz) - * - * Set the length of the buffer used by the bStream. If sz is zero, the - * length is not set. This function returns with the previous length. - */ -int bsbufflength (struct bStream * s, int sz) { -int oldSz; - if (s == NULL || sz < 0) return BSTR_ERR; - oldSz = s->maxBuffSz; - if (sz > 0) s->maxBuffSz = sz; - return oldSz; -} - -int bseof (const struct bStream * s) { - if (s == NULL || s->readFnPtr == NULL) return BSTR_ERR; - return s->isEOF && (s->buff->slen == 0); -} - -/* void * bsclose (struct bStream * s) - * - * Close the bStream, and return the handle to the stream that was originally - * used to open the given stream. - */ -void * bsclose (struct bStream * s) { -void * parm; - if (s == NULL) return NULL; - s->readFnPtr = NULL; - if (s->buff) bdestroy (s->buff); - s->buff = NULL; - parm = s->parm; - s->parm = NULL; - s->isEOF = 1; - bstr__free (s); - return parm; -} - -/* int bsreadlna (bstring r, struct bStream * s, char terminator) - * - * Read a bstring terminated by the terminator character or the end of the - * stream from the bStream (s) and return it into the parameter r. This - * function may read additional characters from the core stream that are not - * returned, but will be retained for subsequent read operations. - */ -int bsreadlna (bstring r, struct bStream * s, char terminator) { -int i, l, ret, rlo; -char * b; -struct tagbstring x; - - if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 || - r->slen < 0 || r->mlen < r->slen) return BSTR_ERR; - l = s->buff->slen; - if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - b = (char *) s->buff->data; - x.data = (unsigned char *) b; - - /* First check if the current buffer holds the terminator */ - b[l] = terminator; /* Set sentinel */ - for (i=0; b[i] != terminator; i++) ; - if (i < l) { - x.slen = i + 1; - ret = bconcat (r, &x); - s->buff->slen = l; - if (BSTR_OK == ret) bdelete (s->buff, 0, i + 1); - return BSTR_OK; - } - - rlo = r->slen; - - /* If not then just concatenate the entire buffer to the output */ - x.slen = l; - if (BSTR_OK != bconcat (r, &x)) return BSTR_ERR; - - /* Perform direct in-place reads into the destination to allow for - the minimum of data-copies */ - for (;;) { - if (BSTR_OK != balloc (r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR; - b = (char *) (r->data + r->slen); - l = (int) s->readFnPtr (b, 1, s->maxBuffSz, s->parm); - if (l <= 0) { - r->data[r->slen] = (unsigned char) '\0'; - s->buff->slen = 0; - s->isEOF = 1; - /* If nothing was read return with an error message */ - return BSTR_ERR & -(r->slen == rlo); - } - b[l] = terminator; /* Set sentinel */ - for (i=0; b[i] != terminator; i++) ; - if (i < l) break; - r->slen += l; - } - - /* Terminator found, push over-read back to buffer */ - i++; - r->slen += i; - s->buff->slen = l - i; - bstr__memcpy (s->buff->data, b + i, l - i); - r->data[r->slen] = (unsigned char) '\0'; - return BSTR_OK; -} - -/* int bsreadlnsa (bstring r, struct bStream * s, bstring term) - * - * Read a bstring terminated by any character in the term string or the end - * of the stream from the bStream (s) and return it into the parameter r. - * This function may read additional characters from the core stream that - * are not returned, but will be retained for subsequent read operations. - */ -int bsreadlnsa (bstring r, struct bStream * s, const_bstring term) { -int i, l, ret, rlo; -unsigned char * b; -struct tagbstring x; -struct charField cf; - - if (s == NULL || s->buff == NULL || r == NULL || term == NULL || - term->data == NULL || r->mlen <= 0 || r->slen < 0 || - r->mlen < r->slen) return BSTR_ERR; - if (term->slen == 1) return bsreadlna (r, s, term->data[0]); - if (term->slen < 1 || buildCharField (&cf, term)) return BSTR_ERR; - - l = s->buff->slen; - if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - b = (unsigned char *) s->buff->data; - x.data = b; - - /* First check if the current buffer holds the terminator */ - b[l] = term->data[0]; /* Set sentinel */ - for (i=0; !testInCharField (&cf, b[i]); i++) ; - if (i < l) { - x.slen = i + 1; - ret = bconcat (r, &x); - s->buff->slen = l; - if (BSTR_OK == ret) bdelete (s->buff, 0, i + 1); - return BSTR_OK; - } - - rlo = r->slen; - - /* If not then just concatenate the entire buffer to the output */ - x.slen = l; - if (BSTR_OK != bconcat (r, &x)) return BSTR_ERR; - - /* Perform direct in-place reads into the destination to allow for - the minimum of data-copies */ - for (;;) { - if (BSTR_OK != balloc (r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR; - b = (unsigned char *) (r->data + r->slen); - l = (int) s->readFnPtr (b, 1, s->maxBuffSz, s->parm); - if (l <= 0) { - r->data[r->slen] = (unsigned char) '\0'; - s->buff->slen = 0; - s->isEOF = 1; - /* If nothing was read return with an error message */ - return BSTR_ERR & -(r->slen == rlo); - } - - b[l] = term->data[0]; /* Set sentinel */ - for (i=0; !testInCharField (&cf, b[i]); i++) ; - if (i < l) break; - r->slen += l; - } - - /* Terminator found, push over-read back to buffer */ - i++; - r->slen += i; - s->buff->slen = l - i; - bstr__memcpy (s->buff->data, b + i, l - i); - r->data[r->slen] = (unsigned char) '\0'; - return BSTR_OK; -} - -/* int bsreada (bstring r, struct bStream * s, int n) - * - * Read a bstring of length n (or, if it is fewer, as many bytes as is - * remaining) from the bStream. This function may read additional - * characters from the core stream that are not returned, but will be - * retained for subsequent read operations. This function will not read - * additional characters from the core stream beyond virtual stream pointer. - */ -int bsreada (bstring r, struct bStream * s, int n) { -int l, ret, orslen; -char * b; -struct tagbstring x; - - if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 - || r->slen < 0 || r->mlen < r->slen || n <= 0) return BSTR_ERR; - - n += r->slen; - if (n <= 0) return BSTR_ERR; - - l = s->buff->slen; - - orslen = r->slen; - - if (0 == l) { - if (s->isEOF) return BSTR_ERR; - if (r->mlen > n) { - l = (int) s->readFnPtr (r->data + r->slen, 1, n - r->slen, s->parm); - if (0 >= l || l > n - r->slen) { - s->isEOF = 1; - return BSTR_ERR; - } - r->slen += l; - r->data[r->slen] = (unsigned char) '\0'; - return 0; - } - } - - if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - b = (char *) s->buff->data; - x.data = (unsigned char *) b; - - do { - if (l + r->slen >= n) { - x.slen = n - r->slen; - ret = bconcat (r, &x); - s->buff->slen = l; - if (BSTR_OK == ret) bdelete (s->buff, 0, x.slen); - return BSTR_ERR & -(r->slen == orslen); - } - - x.slen = l; - if (BSTR_OK != bconcat (r, &x)) break; - - l = n - r->slen; - if (l > s->maxBuffSz) l = s->maxBuffSz; - - l = (int) s->readFnPtr (b, 1, l, s->parm); - - } while (l > 0); - if (l < 0) l = 0; - if (l == 0) s->isEOF = 1; - s->buff->slen = l; - return BSTR_ERR & -(r->slen == orslen); -} - -/* int bsreadln (bstring r, struct bStream * s, char terminator) - * - * Read a bstring terminated by the terminator character or the end of the - * stream from the bStream (s) and return it into the parameter r. This - * function may read additional characters from the core stream that are not - * returned, but will be retained for subsequent read operations. - */ -int bsreadln (bstring r, struct bStream * s, char terminator) { - if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0) - return BSTR_ERR; - if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - r->slen = 0; - return bsreadlna (r, s, terminator); -} - -/* int bsreadlns (bstring r, struct bStream * s, bstring term) - * - * Read a bstring terminated by any character in the term string or the end - * of the stream from the bStream (s) and return it into the parameter r. - * This function may read additional characters from the core stream that - * are not returned, but will be retained for subsequent read operations. - */ -int bsreadlns (bstring r, struct bStream * s, const_bstring term) { - if (s == NULL || s->buff == NULL || r == NULL || term == NULL - || term->data == NULL || r->mlen <= 0) return BSTR_ERR; - if (term->slen == 1) return bsreadln (r, s, term->data[0]); - if (term->slen < 1) return BSTR_ERR; - if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - r->slen = 0; - return bsreadlnsa (r, s, term); -} - -/* int bsread (bstring r, struct bStream * s, int n) - * - * Read a bstring of length n (or, if it is fewer, as many bytes as is - * remaining) from the bStream. This function may read additional - * characters from the core stream that are not returned, but will be - * retained for subsequent read operations. This function will not read - * additional characters from the core stream beyond virtual stream pointer. - */ -int bsread (bstring r, struct bStream * s, int n) { - if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 - || n <= 0) return BSTR_ERR; - if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - r->slen = 0; - return bsreada (r, s, n); -} - -/* int bsunread (struct bStream * s, const_bstring b) - * - * Insert a bstring into the bStream at the current position. These - * characters will be read prior to those that actually come from the core - * stream. - */ -int bsunread (struct bStream * s, const_bstring b) { - if (s == NULL || s->buff == NULL) return BSTR_ERR; - return binsert (s->buff, 0, b, (unsigned char) '?'); -} - -/* int bspeek (bstring r, const struct bStream * s) - * - * Return the currently buffered characters from the bStream that will be - * read prior to reads from the core stream. - */ -int bspeek (bstring r, const struct bStream * s) { - if (s == NULL || s->buff == NULL) return BSTR_ERR; - return bassign (r, s->buff); -} - -/* bstring bjoin (const struct bstrList * bl, const_bstring sep); - * - * Join the entries of a bstrList into one bstring by sequentially - * concatenating them with the sep string in between. If there is an error - * NULL is returned, otherwise a bstring with the correct result is returned. - */ -bstring bjoin (const struct bstrList * bl, const_bstring sep) { -bstring b; -int i, c, v; - - if (bl == NULL || bl->qty < 0) return NULL; - if (sep != NULL && (sep->slen < 0 || sep->data == NULL)) return NULL; - - for (i = 0, c = 1; i < bl->qty; i++) { - v = bl->entry[i]->slen; - if (v < 0) return NULL; /* Invalid input */ - c += v; - if (c < 0) return NULL; /* Wrap around ?? */ - } - - if (sep != NULL) c += (bl->qty - 1) * sep->slen; - - b = (bstring) bstr__alloc (sizeof (struct tagbstring)); - if (NULL == b) return NULL; /* Out of memory */ - b->data = (unsigned char *) bstr__alloc (c); - if (b->data == NULL) { - bstr__free (b); - return NULL; - } - - b->mlen = c; - b->slen = c-1; - - for (i = 0, c = 0; i < bl->qty; i++) { - if (i > 0 && sep != NULL) { - bstr__memcpy (b->data + c, sep->data, sep->slen); - c += sep->slen; - } - v = bl->entry[i]->slen; - bstr__memcpy (b->data + c, bl->entry[i]->data, v); - c += v; - } - b->data[c] = (unsigned char) '\0'; - return b; -} - -#define BSSSC_BUFF_LEN (256) - -/* int bssplitscb (struct bStream * s, const_bstring splitStr, - * int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) - * - * Iterate the set of disjoint sequential substrings read from a stream - * divided by any of the characters in splitStr. An empty splitStr causes - * the whole stream to be iterated once. - * - * Note: At the point of calling the cb function, the bStream pointer is - * pointed exactly at the position right after having read the split - * character. The cb function can act on the stream by causing the bStream - * pointer to move, and bssplitscb will continue by starting the next split - * at the position of the pointer after the return from cb. - * - * However, if the cb causes the bStream s to be destroyed then the cb must - * return with a negative value, otherwise bssplitscb will continue in an - * undefined manner. - */ -int bssplitscb (struct bStream * s, const_bstring splitStr, - int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) { -struct charField chrs; -bstring buff; -int i, p, ret; - - if (cb == NULL || s == NULL || s->readFnPtr == NULL - || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; - - if (NULL == (buff = bfromcstr (""))) return BSTR_ERR; - - if (splitStr->slen == 0) { - while (bsreada (buff, s, BSSSC_BUFF_LEN) >= 0) ; - if ((ret = cb (parm, 0, buff)) > 0) - ret = 0; - } else { - buildCharField (&chrs, splitStr); - ret = p = i = 0; - for (;;) { - if (i >= buff->slen) { - bsreada (buff, s, BSSSC_BUFF_LEN); - if (i >= buff->slen) { - if (0 < (ret = cb (parm, p, buff))) ret = 0; - break; - } - } - if (testInCharField (&chrs, buff->data[i])) { - struct tagbstring t; - unsigned char c; - - blk2tbstr (t, buff->data + i + 1, buff->slen - (i + 1)); - if ((ret = bsunread (s, &t)) < 0) break; - buff->slen = i; - c = buff->data[i]; - buff->data[i] = (unsigned char) '\0'; - if ((ret = cb (parm, p, buff)) < 0) break; - buff->data[i] = c; - buff->slen = 0; - p += i + 1; - i = -1; - } - i++; - } - } - - bdestroy (buff); - return ret; -} - -/* int bssplitstrcb (struct bStream * s, const_bstring splitStr, - * int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) - * - * Iterate the set of disjoint sequential substrings read from a stream - * divided by the entire substring splitStr. An empty splitStr causes - * each character of the stream to be iterated. - * - * Note: At the point of calling the cb function, the bStream pointer is - * pointed exactly at the position right after having read the split - * character. The cb function can act on the stream by causing the bStream - * pointer to move, and bssplitscb will continue by starting the next split - * at the position of the pointer after the return from cb. - * - * However, if the cb causes the bStream s to be destroyed then the cb must - * return with a negative value, otherwise bssplitscb will continue in an - * undefined manner. - */ -int bssplitstrcb (struct bStream * s, const_bstring splitStr, - int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) { -bstring buff; -int i, p, ret; - - if (cb == NULL || s == NULL || s->readFnPtr == NULL - || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; - - if (splitStr->slen == 1) return bssplitscb (s, splitStr, cb, parm); - - if (NULL == (buff = bfromcstr (""))) return BSTR_ERR; - - if (splitStr->slen == 0) { - for (i=0; bsreada (buff, s, BSSSC_BUFF_LEN) >= 0; i++) { - if ((ret = cb (parm, 0, buff)) < 0) { - bdestroy (buff); - return ret; - } - buff->slen = 0; - } - return BSTR_OK; - } else { - ret = p = i = 0; - for (i=p=0;;) { - if ((ret = binstr (buff, 0, splitStr)) >= 0) { - struct tagbstring t; - blk2tbstr (t, buff->data, ret); - i = ret + splitStr->slen; - if ((ret = cb (parm, p, &t)) < 0) break; - p += i; - bdelete (buff, 0, i); - } else { - bsreada (buff, s, BSSSC_BUFF_LEN); - if (bseof (s)) { - if ((ret = cb (parm, p, buff)) > 0) ret = 0; - break; - } - } - } - } - - bdestroy (buff); - return ret; -} - -/* int bstrListCreate (void) - * - * Create a bstrList. - */ -struct bstrList * bstrListCreate (void) { -struct bstrList * sl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); - if (sl) { - sl->entry = (bstring *) bstr__alloc (1*sizeof (bstring)); - if (!sl->entry) { - bstr__free (sl); - sl = NULL; - } else { - sl->qty = 0; - sl->mlen = 1; - } - } - return sl; -} - -/* int bstrListDestroy (struct bstrList * sl) - * - * Destroy a bstrList that has been created by bsplit, bsplits or bstrListCreate. - */ -int bstrListDestroy (struct bstrList * sl) { -int i; - if (sl == NULL || sl->qty < 0) return BSTR_ERR; - for (i=0; i < sl->qty; i++) { - if (sl->entry[i]) { - bdestroy (sl->entry[i]); - sl->entry[i] = NULL; - } - } - sl->qty = -1; - sl->mlen = -1; - bstr__free (sl->entry); - sl->entry = NULL; - bstr__free (sl); - return BSTR_OK; -} - -/* int bstrListAlloc (struct bstrList * sl, int msz) - * - * Ensure that there is memory for at least msz number of entries for the - * list. - */ -int bstrListAlloc (struct bstrList * sl, int msz) { -bstring * l; -int smsz; -size_t nsz; - if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR; - if (sl->mlen >= msz) return BSTR_OK; - smsz = snapUpSize (msz); - nsz = ((size_t) smsz) * sizeof (bstring); - if (nsz < (size_t) smsz) return BSTR_ERR; - l = (bstring *) bstr__realloc (sl->entry, nsz); - if (!l) { - smsz = msz; - nsz = ((size_t) smsz) * sizeof (bstring); - l = (bstring *) bstr__realloc (sl->entry, nsz); - if (!l) return BSTR_ERR; - } - sl->mlen = smsz; - sl->entry = l; - return BSTR_OK; -} - -/* int bstrListAllocMin (struct bstrList * sl, int msz) - * - * Try to allocate the minimum amount of memory for the list to include at - * least msz entries or sl->qty whichever is greater. - */ -int bstrListAllocMin (struct bstrList * sl, int msz) { -bstring * l; -size_t nsz; - if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR; - if (msz < sl->qty) msz = sl->qty; - if (sl->mlen == msz) return BSTR_OK; - nsz = ((size_t) msz) * sizeof (bstring); - if (nsz < (size_t) msz) return BSTR_ERR; - l = (bstring *) bstr__realloc (sl->entry, nsz); - if (!l) return BSTR_ERR; - sl->mlen = msz; - sl->entry = l; - return BSTR_OK; -} - -/* int bsplitcb (const_bstring str, unsigned char splitChar, int pos, - * int (* cb) (void * parm, int ofs, int len), void * parm) - * - * Iterate the set of disjoint sequential substrings over str divided by the - * character in splitChar. - * - * Note: Non-destructive modification of str from within the cb function - * while performing this split is not undefined. bsplitcb behaves in - * sequential lock step with calls to cb. I.e., after returning from a cb - * that return a non-negative integer, bsplitcb continues from the position - * 1 character after the last detected split character and it will halt - * immediately if the length of str falls below this point. However, if the - * cb function destroys str, then it *must* return with a negative value, - * otherwise bsplitcb will continue in an undefined manner. - */ -int bsplitcb (const_bstring str, unsigned char splitChar, int pos, - int (* cb) (void * parm, int ofs, int len), void * parm) { -int i, p, ret; - - if (cb == NULL || str == NULL || pos < 0 || pos > str->slen) - return BSTR_ERR; - - p = pos; - do { - for (i=p; i < str->slen; i++) { - if (str->data[i] == splitChar) break; - } - if ((ret = cb (parm, p, i - p)) < 0) return ret; - p = i + 1; - } while (p <= str->slen); - return BSTR_OK; -} - -/* int bsplitscb (const_bstring str, const_bstring splitStr, int pos, - * int (* cb) (void * parm, int ofs, int len), void * parm) - * - * Iterate the set of disjoint sequential substrings over str divided by any - * of the characters in splitStr. An empty splitStr causes the whole str to - * be iterated once. - * - * Note: Non-destructive modification of str from within the cb function - * while performing this split is not undefined. bsplitscb behaves in - * sequential lock step with calls to cb. I.e., after returning from a cb - * that return a non-negative integer, bsplitscb continues from the position - * 1 character after the last detected split character and it will halt - * immediately if the length of str falls below this point. However, if the - * cb function destroys str, then it *must* return with a negative value, - * otherwise bsplitscb will continue in an undefined manner. - */ -int bsplitscb (const_bstring str, const_bstring splitStr, int pos, - int (* cb) (void * parm, int ofs, int len), void * parm) { -struct charField chrs; -int i, p, ret; - - if (cb == NULL || str == NULL || pos < 0 || pos > str->slen - || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; - if (splitStr->slen == 0) { - if ((ret = cb (parm, 0, str->slen)) > 0) ret = 0; - return ret; - } - - if (splitStr->slen == 1) - return bsplitcb (str, splitStr->data[0], pos, cb, parm); - - buildCharField (&chrs, splitStr); - - p = pos; - do { - for (i=p; i < str->slen; i++) { - if (testInCharField (&chrs, str->data[i])) break; - } - if ((ret = cb (parm, p, i - p)) < 0) return ret; - p = i + 1; - } while (p <= str->slen); - return BSTR_OK; -} - -/* int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos, - * int (* cb) (void * parm, int ofs, int len), void * parm) - * - * Iterate the set of disjoint sequential substrings over str divided by the - * substring splitStr. An empty splitStr causes the whole str to be - * iterated once. - * - * Note: Non-destructive modification of str from within the cb function - * while performing this split is not undefined. bsplitstrcb behaves in - * sequential lock step with calls to cb. I.e., after returning from a cb - * that return a non-negative integer, bsplitscb continues from the position - * 1 character after the last detected split character and it will halt - * immediately if the length of str falls below this point. However, if the - * cb function destroys str, then it *must* return with a negative value, - * otherwise bsplitscb will continue in an undefined manner. - */ -int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos, - int (* cb) (void * parm, int ofs, int len), void * parm) { -int i, p, ret; - - if (cb == NULL || str == NULL || pos < 0 || pos > str->slen - || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; - - if (0 == splitStr->slen) { - for (i=pos; i < str->slen; i++) { - if ((ret = cb (parm, i, 1)) < 0) return ret; - } - return BSTR_OK; - } - - if (splitStr->slen == 1) - return bsplitcb (str, splitStr->data[0], pos, cb, parm); - - for (i=p=pos; i <= str->slen - splitStr->slen; i++) { - if (0 == bstr__memcmp (splitStr->data, str->data + i, splitStr->slen)) { - if ((ret = cb (parm, p, i - p)) < 0) return ret; - i += splitStr->slen; - p = i; - } - } - if ((ret = cb (parm, p, str->slen - p)) < 0) return ret; - return BSTR_OK; -} - -struct genBstrList { - bstring b; - struct bstrList * bl; -}; - -static int bscb (void * parm, int ofs, int len) { -struct genBstrList * g = (struct genBstrList *) parm; - if (g->bl->qty >= g->bl->mlen) { - int mlen = g->bl->mlen * 2; - bstring * tbl; - - while (g->bl->qty >= mlen) { - if (mlen < g->bl->mlen) return BSTR_ERR; - mlen += mlen; - } - - tbl = (bstring *) bstr__realloc (g->bl->entry, sizeof (bstring) * mlen); - if (tbl == NULL) return BSTR_ERR; - - g->bl->entry = tbl; - g->bl->mlen = mlen; - } - - g->bl->entry[g->bl->qty] = bmidstr (g->b, ofs, len); - g->bl->qty++; - return BSTR_OK; -} - -/* struct bstrList * bsplit (const_bstring str, unsigned char splitChar) - * - * Create an array of sequential substrings from str divided by the character - * splitChar. - */ -struct bstrList * bsplit (const_bstring str, unsigned char splitChar) { -struct genBstrList g; - - if (str == NULL || str->data == NULL || str->slen < 0) return NULL; - - g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); - if (g.bl == NULL) return NULL; - g.bl->mlen = 4; - g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring)); - if (NULL == g.bl->entry) { - bstr__free (g.bl); - return NULL; - } - - g.b = (bstring) str; - g.bl->qty = 0; - if (bsplitcb (str, splitChar, 0, bscb, &g) < 0) { - bstrListDestroy (g.bl); - return NULL; - } - return g.bl; -} - -/* struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr) - * - * Create an array of sequential substrings from str divided by the entire - * substring splitStr. - */ -struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr) { -struct genBstrList g; - - if (str == NULL || str->data == NULL || str->slen < 0) return NULL; - - g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); - if (g.bl == NULL) return NULL; - g.bl->mlen = 4; - g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring)); - if (NULL == g.bl->entry) { - bstr__free (g.bl); - return NULL; - } - - g.b = (bstring) str; - g.bl->qty = 0; - if (bsplitstrcb (str, splitStr, 0, bscb, &g) < 0) { - bstrListDestroy (g.bl); - return NULL; - } - return g.bl; -} - -/* struct bstrList * bsplits (const_bstring str, bstring splitStr) - * - * Create an array of sequential substrings from str divided by any of the - * characters in splitStr. An empty splitStr causes a single entry bstrList - * containing a copy of str to be returned. - */ -struct bstrList * bsplits (const_bstring str, const_bstring splitStr) { -struct genBstrList g; - - if ( str == NULL || str->slen < 0 || str->data == NULL || - splitStr == NULL || splitStr->slen < 0 || splitStr->data == NULL) - return NULL; - - g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); - if (g.bl == NULL) return NULL; - g.bl->mlen = 4; - g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring)); - if (NULL == g.bl->entry) { - bstr__free (g.bl); - return NULL; - } - g.b = (bstring) str; - g.bl->qty = 0; - - if (bsplitscb (str, splitStr, 0, bscb, &g) < 0) { - bstrListDestroy (g.bl); - return NULL; - } - return g.bl; -} - -#if defined (__TURBOC__) && !defined (__BORLANDC__) -# ifndef BSTRLIB_NOVSNP -# define BSTRLIB_NOVSNP -# endif -#endif - -/* Give WATCOM C/C++, MSVC some latitude for their non-support of vsnprintf */ -#if defined(__WATCOMC__) || defined(_MSC_VER) -#define exvsnprintf(r,b,n,f,a) {r = _vsnprintf (b,n,f,a);} -#else -#ifdef BSTRLIB_NOVSNP -/* This is just a hack. If you are using a system without a vsnprintf, it is - not recommended that bformat be used at all. */ -#define exvsnprintf(r,b,n,f,a) {vsprintf (b,f,a); r = -1;} -#define START_VSNBUFF (256) -#else - -#ifdef __GNUC__ -/* Something is making gcc complain about this prototype not being here, so - I've just gone ahead and put it in. */ -extern int vsnprintf (char *buf, size_t count, const char *format, va_list arg); -#endif - -#define exvsnprintf(r,b,n,f,a) {r = vsnprintf (b,n,f,a);} -#endif -#endif - -#if !defined (BSTRLIB_NOVSNP) - -#ifndef START_VSNBUFF -#define START_VSNBUFF (16) -#endif - -/* On IRIX vsnprintf returns n-1 when the operation would overflow the target - buffer, WATCOM and MSVC both return -1, while C99 requires that the - returned value be exactly what the length would be if the buffer would be - large enough. This leads to the idea that if the return value is larger - than n, then changing n to the return value will reduce the number of - iterations required. */ - -/* int bformata (bstring b, const char * fmt, ...) - * - * After the first parameter, it takes the same parameters as printf (), but - * rather than outputting results to stdio, it appends the results to - * a bstring which contains what would have been output. Note that if there - * is an early generation of a '\0' character, the bstring will be truncated - * to this end point. - */ -int bformata (bstring b, const char * fmt, ...) { -va_list arglist; -bstring buff; -int n, r; - - if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0 - || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; - - /* Since the length is not determinable beforehand, a search is - performed using the truncating "vsnprintf" call (to avoid buffer - overflows) on increasing potential sizes for the output result. */ - - if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF; - if (NULL == (buff = bfromcstralloc (n + 2, ""))) { - n = 1; - if (NULL == (buff = bfromcstralloc (n + 2, ""))) return BSTR_ERR; - } - - for (;;) { - va_start (arglist, fmt); - exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist); - va_end (arglist); - - buff->data[n] = (unsigned char) '\0'; - buff->slen = (int) (strlen) ((char *) buff->data); - - if (buff->slen < n) break; - - if (r > n) n = r; else n += n; - - if (BSTR_OK != balloc (buff, n + 2)) { - bdestroy (buff); - return BSTR_ERR; - } - } - - r = bconcat (b, buff); - bdestroy (buff); - return r; -} - -/* int bassignformat (bstring b, const char * fmt, ...) - * - * After the first parameter, it takes the same parameters as printf (), but - * rather than outputting results to stdio, it outputs the results to - * the bstring parameter b. Note that if there is an early generation of a - * '\0' character, the bstring will be truncated to this end point. - */ -int bassignformat (bstring b, const char * fmt, ...) { -va_list arglist; -bstring buff; -int n, r; - - if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0 - || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; - - /* Since the length is not determinable beforehand, a search is - performed using the truncating "vsnprintf" call (to avoid buffer - overflows) on increasing potential sizes for the output result. */ - - if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF; - if (NULL == (buff = bfromcstralloc (n + 2, ""))) { - n = 1; - if (NULL == (buff = bfromcstralloc (n + 2, ""))) return BSTR_ERR; - } - - for (;;) { - va_start (arglist, fmt); - exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist); - va_end (arglist); - - buff->data[n] = (unsigned char) '\0'; - buff->slen = (int) (strlen) ((char *) buff->data); - - if (buff->slen < n) break; - - if (r > n) n = r; else n += n; - - if (BSTR_OK != balloc (buff, n + 2)) { - bdestroy (buff); - return BSTR_ERR; - } - } - - r = bassign (b, buff); - bdestroy (buff); - return r; -} - -/* bstring bformat (const char * fmt, ...) - * - * Takes the same parameters as printf (), but rather than outputting results - * to stdio, it forms a bstring which contains what would have been output. - * Note that if there is an early generation of a '\0' character, the - * bstring will be truncated to this end point. - */ -bstring bformat (const char * fmt, ...) { -va_list arglist; -bstring buff; -int n, r; - - if (fmt == NULL) return NULL; - - /* Since the length is not determinable beforehand, a search is - performed using the truncating "vsnprintf" call (to avoid buffer - overflows) on increasing potential sizes for the output result. */ - - if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF; - if (NULL == (buff = bfromcstralloc (n + 2, ""))) { - n = 1; - if (NULL == (buff = bfromcstralloc (n + 2, ""))) return NULL; - } - - for (;;) { - va_start (arglist, fmt); - exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist); - va_end (arglist); - - buff->data[n] = (unsigned char) '\0'; - buff->slen = (int) (strlen) ((char *) buff->data); - - if (buff->slen < n) break; - - if (r > n) n = r; else n += n; - - if (BSTR_OK != balloc (buff, n + 2)) { - bdestroy (buff); - return NULL; - } - } - - return buff; -} - -/* int bvcformata (bstring b, int count, const char * fmt, va_list arglist) - * - * The bvcformata function formats data under control of the format control - * string fmt and attempts to append the result to b. The fmt parameter is - * the same as that of the printf function. The variable argument list is - * replaced with arglist, which has been initialized by the va_start macro. - * The size of the output is upper bounded by count. If the required output - * exceeds count, the string b is not augmented with any contents and a value - * below BSTR_ERR is returned. If a value below -count is returned then it - * is recommended that the negative of this value be used as an update to the - * count in a subsequent pass. On other errors, such as running out of - * memory, parameter errors or numeric wrap around BSTR_ERR is returned. - * BSTR_OK is returned when the output is successfully generated and - * appended to b. - * - * Note: There is no sanity checking of arglist, and this function is - * destructive of the contents of b from the b->slen point onward. If there - * is an early generation of a '\0' character, the bstring will be truncated - * to this end point. - */ -int bvcformata (bstring b, int count, const char * fmt, va_list arg) { -int n, r, l; - - if (b == NULL || fmt == NULL || count <= 0 || b->data == NULL - || b->mlen <= 0 || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; - - if (count > (n = b->slen + count) + 2) return BSTR_ERR; - if (BSTR_OK != balloc (b, n + 2)) return BSTR_ERR; - - exvsnprintf (r, (char *) b->data + b->slen, count + 2, fmt, arg); - - /* Did the operation complete successfully within bounds? */ - - if (n >= (l = b->slen + (int) (strlen) ((const char *) b->data + b->slen))) { - b->slen = l; - return BSTR_OK; - } - - /* Abort, since the buffer was not large enough. The return value - tries to help set what the retry length should be. */ - - b->data[b->slen] = '\0'; - if (r > count+1) l = r; else { - l = count+count; - if (count > l) l = INT_MAX; - } - n = -l; - if (n > BSTR_ERR-1) n = BSTR_ERR-1; - return n; -} - -#endif diff --git a/bench/src/bstrlib.c b/bench/src/bstrlib.c new file mode 120000 index 000000000..315a035e3 --- /dev/null +++ b/bench/src/bstrlib.c @@ -0,0 +1 @@ +../../src/bstrlib.c \ No newline at end of file diff --git a/bench/src/bstrlib_helper.c b/bench/src/bstrlib_helper.c new file mode 120000 index 000000000..b50d44f15 --- /dev/null +++ b/bench/src/bstrlib_helper.c @@ -0,0 +1 @@ +../../src/bstrlib_helper.c \ No newline at end of file diff --git a/bench/src/ptt2asm.c b/bench/src/ptt2asm.c new file mode 100644 index 000000000..6767e2951 --- /dev/null +++ b/bench/src/ptt2asm.c @@ -0,0 +1,767 @@ +/* + * ======================================================================================= + * Filename: ptt2asm.c + * + * Description: The interface to dynamically load ptt files + * + * Version: + * Released: + * + * Author: Thomas Gruber (tg), thomas.roehl@gmail.com + * Project: likwid + * + * Copyright (C) 2019 RRZE, University Erlangen-Nuremberg + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + * + * ======================================================================================= + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + + +#include + +#ifdef __x86_64 +#include +#endif +#ifdef __i386__ +#include +#endif +#ifdef __ARM_ARCH_7A__ +#include +#endif +#ifdef __ARM_ARCH_8A +#include +#endif +#ifdef _ARCH_PPC +#include +#endif + +static int registerMapLength(RegisterMap* map) +{ + int i = 0; + while (strlen(map[i].pattern) > 0) + { + i++; + } + return i; +} + +static int registerMapMaxPattern(RegisterMap* map) +{ + int i = 0; + int max = 0; + while (strlen(map[i].pattern) > 0) + { + if (strlen(map[i].pattern) > max) + max = strlen(map[i].pattern); + i++; + } + return max; +} + +static struct bstrList* read_ptt(bstring pttfile) +{ + int ret = 0; + FILE* fp = NULL; + char buf[BUFSIZ]; + struct bstrList* l = NULL; + + if (access(bdata(pttfile), R_OK)) + { + return NULL; + } + + bstring content = bfromcstr(""); + fp = fopen(bdata(pttfile), "r"); + if (fp == NULL) { + fprintf(stderr, "fopen(%s): errno=%d\n", pttfile, errno); + return NULL; + } + for (;;) { + /* Read another chunk */ + ret = fread(buf, 1, sizeof(buf), fp); + if (ret < 0) { + fprintf(stderr, "fread(%p, 1, %lu, %p): %d, errno=%d\n", buf, sizeof(buf), fp, ret, errno); + return NULL; + } + else if (ret == 0) { + break; + } + bcatblk(content, buf, ret); + } + btrimws(content); + + l = bsplit(content, '\n'); + for (int i = 0; i < l->qty; i++) + { + btrimws(l->entry[i]); + } + + bdestroy(content); + return l; +} + +static int write_asm(bstring filename, struct bstrList* code) +{ + FILE* fp = NULL; + char newline = '\n'; + size_t (*ownfwrite)(const void *ptr, size_t size, size_t nmemb, FILE *stream) = &fwrite; + fp = fopen(bdata(filename), "w"); + if (fp) + { + for (int i = 0; i < code->qty; i++) + { + ownfwrite(bdata(code->entry[i]), 1, blength(code->entry[i]), fp); + ownfwrite(&newline, 1, sizeof(char), fp); + } + fclose(fp); + return 0; + } + return 1; +} + +#define ANALYSE_PTT_GET_INT(line, pattern, variable) \ + bstring tmp = bmidstr((line), blength((pattern))+1, blength((line))-blength((pattern))); \ + btrimws(tmp); \ + (variable) = ownatoi(bdata(tmp)); \ + bdestroy(tmp); \ + +static struct bstrList* analyse_ptt(bstring pttfile, TestCase** testcase) +{ + struct bstrList* ptt = NULL; + TestCase* test = NULL; + struct bstrList* code = NULL; + bstring bBYTES = bformat("BYTES"); + bstring bFLOPS = bformat("FLOPS"); + bstring bSTREAMS = bformat("STREAMS"); + bstring bTYPE = bformat("TYPE"); + bstring bTYPEDOUBLE = bformat("DOUBLE"); + bstring bTYPESINGLE = bformat("SINGLE"); + bstring bTYPEINT = bformat("INT"); + bstring bDESC = bformat("DESC"); + bstring bLOADS = bformat("LOADS"); + bstring bSTORES = bformat("STORES"); + bstring bLOADSTORES = bformat("LOADSTORES"); + bstring bINSTCONST = bformat("INSTR_CONST"); + bstring bINSTLOOP = bformat("INSTR_LOOP"); + bstring bUOPS = bformat("UOPS"); + bstring bBRANCHES = bformat("BRANCHES"); + bstring bLOOP = bformat("LOOP"); + int (*ownatoi)(const char*) = &atoi; + + ptt = read_ptt(pttfile); + + if (ptt && ptt->qty > 0) + { + test = malloc(sizeof(TestCase)); + if (test) + { + test->loads = -1; + test->stores = -1; + test->loadstores = -1; + test->branches = -1; + test->instr_const = -1; + test->instr_loop = -1; + test->uops = -1; + code = bstrListCreate(); + for (int i = 0; i < ptt->qty; i++) + { + if (bstrncmp(ptt->entry[i], bBYTES, blength(bBYTES)) == BSTR_OK) + { + ANALYSE_PTT_GET_INT(ptt->entry[i], bBYTES, test->bytes); + } + else if (bstrncmp(ptt->entry[i], bFLOPS, blength(bFLOPS)) == BSTR_OK) + { + ANALYSE_PTT_GET_INT(ptt->entry[i], bFLOPS, test->flops); + } + else if (bstrncmp(ptt->entry[i], bSTREAMS, blength(bSTREAMS)) == BSTR_OK) + { + ANALYSE_PTT_GET_INT(ptt->entry[i], bSTREAMS, test->streams); + } + else if (bstrncmp(ptt->entry[i], bLOADS, blength(bLOADS)) == BSTR_OK) + { + ANALYSE_PTT_GET_INT(ptt->entry[i], bLOADS, test->loads); + } + else if (bstrncmp(ptt->entry[i], bSTORES, blength(bSTORES)) == BSTR_OK) + { + ANALYSE_PTT_GET_INT(ptt->entry[i], bSTORES, test->stores); + } + else if (bstrncmp(ptt->entry[i], bLOADSTORES, blength(bLOADSTORES)) == BSTR_OK) + { + ANALYSE_PTT_GET_INT(ptt->entry[i], bLOADSTORES, test->loadstores); + } + else if (bstrncmp(ptt->entry[i], bINSTCONST, blength(bINSTCONST)) == BSTR_OK) + { + ANALYSE_PTT_GET_INT(ptt->entry[i], bINSTCONST, test->instr_const); + } + else if (bstrncmp(ptt->entry[i], bINSTLOOP, blength(bINSTLOOP)) == BSTR_OK) + { + ANALYSE_PTT_GET_INT(ptt->entry[i], bINSTLOOP, test->instr_loop); + } + else if (bstrncmp(ptt->entry[i], bUOPS, blength(bUOPS)) == BSTR_OK) + { + ANALYSE_PTT_GET_INT(ptt->entry[i], bUOPS, test->uops); + } + else if (bstrncmp(ptt->entry[i], bBRANCHES, blength(bBRANCHES)) == BSTR_OK) + { + ANALYSE_PTT_GET_INT(ptt->entry[i], bBRANCHES, test->branches); + } + else if (bstrncmp(ptt->entry[i], bLOOP, blength(bLOOP)) == BSTR_OK) + { + ANALYSE_PTT_GET_INT(ptt->entry[i], bLOOP, test->stride); + bstrListAdd(code, ptt->entry[i]); + } + else if (bstrncmp(ptt->entry[i], bDESC, blength(bDESC)) == BSTR_OK) + { + test->desc = malloc((blength(ptt->entry[i])+2)*sizeof(char)); + if (test->desc) + { + int ret = snprintf(test->desc, blength(ptt->entry[i])+1, "%s", bdataofs(ptt->entry[i], blength(bDESC)+1)); + if (ret > 0) + { + test->desc[ret] = '\0'; + } + } + } + else if (bstrncmp(ptt->entry[i], bTYPE, blength(bTYPE)) == BSTR_OK) + { + bstring btype = bmidstr(ptt->entry[i], blength(bTYPE)+1, blength(ptt->entry[i])-blength(bTYPE)); + btrimws(btype); + if (bstrncmp(btype, bTYPEDOUBLE, blength(bTYPEDOUBLE)) == BSTR_OK) + { + test->type = DOUBLE; + } + else if (bstrncmp(btype, bTYPESINGLE, blength(bTYPESINGLE)) == BSTR_OK) + { + test->type = SINGLE; + } + else if (bstrncmp(btype, bTYPEINT, blength(bTYPEINT)) == BSTR_OK) + { + test->type = INT; + } + else + { + fprintf(stderr, "Failed to determine type of benchmark\n"); + bdestroy(btype); + bstrListDestroy(code); + free(test); + test = NULL; + code = NULL; + break; + } + bdestroy(btype); + } + else + { + bstrListAdd(code, ptt->entry[i]); + } + } + *testcase = test; + } + bstrListDestroy(ptt); + } + + bdestroy(bBYTES); + bdestroy(bFLOPS); + bdestroy(bSTREAMS); + bdestroy(bTYPE); + bdestroy(bTYPEDOUBLE); + bdestroy(bTYPESINGLE); + bdestroy(bTYPEINT); + bdestroy(bDESC); + bdestroy(bLOADS); + bdestroy(bSTORES); + bdestroy(bLOADSTORES); + bdestroy(bINSTCONST); + bdestroy(bINSTLOOP); + bdestroy(bUOPS); + bdestroy(bBRANCHES); + bdestroy(bLOOP); + return code; +} + +static int set_testname(char *pttfile, TestCase* testcase) +{ + if ((!testcase)||(!pttfile)) + { + return -EINVAL; + } + bstring ptt = bfromcstr(basename(pttfile)); + int dot = bstrrchrp(ptt, '.', blength(ptt)-1); + btrunc(ptt, dot); + testcase->name = malloc((blength(ptt)+2) * sizeof(char)); + int ret = snprintf(testcase->name, blength(ptt)+1, "%s", bdata(ptt)); + if (ret > 0) + { + testcase->name[ret] = '\0'; + } + bdestroy(ptt); + return 0; +} + + +static struct bstrList* parse_asm(TestCase* testcase, struct bstrList* input) +{ + struct bstrList* output = NULL; + if (testcase && input) + { + + struct bstrList* pre = bstrListCreate(); + struct bstrList* loop = bstrListCreate(); + int got_loop = 0; + bstring bLOOP = bformat("LOOP"); + int step = testcase->stride; + + for (int i = 0; i < input->qty; i++) + { + if (bstrncmp(input->entry[i], bLOOP, blength(bLOOP)) == BSTR_OK) + { + got_loop = 1; + continue; + } + if (!got_loop) + { + bstrListAdd(pre, input->entry[i]); + } + else + { + bstrListAdd(loop, input->entry[i]); + } + } + bdestroy(bLOOP); + + output = bstrListCreate(); + + header(output, testcase->name); + + for (int i = 0; i < pre->qty; i++) + { + bstrListAdd(output, pre->entry[i]); + } + loopheader(output, "1", step); + for (int i = 0; i < loop->qty; i++) + { + bstrListAdd(output, loop->entry[i]); + } + loopfooter(output, "1", step); + + footer(output, testcase->name); + + bstrListDestroy(pre); + bstrListDestroy(loop); + } + return output; +} + +static int searchreplace(bstring line, RegisterMap* map) +{ + int maxlen = registerMapMaxPattern(map); + int size = registerMapLength(map); + for (int s = maxlen; s>= 1; s--) + { + int c = 0; + for (int j = 0; j < size; j++) + { + if (strlen(map[j].pattern) == s) + { + bstring pat = bfromcstr(map[j].pattern); + bstring reg = bfromcstr(map[j].reg); + bfindreplace(line, pat, reg, 0); + bdestroy(pat); + bdestroy(reg); + c++; + } + } + if (c == 0) + { + break; + } + } + return 0; +} + +static int prepare_code(struct bstrList* code) +{ + if (code) + { + for (int i = 0; i < code->qty; i++) + { + searchreplace(code->entry[i], StreamPatterns); + } + for (int i = 0; i < code->qty; i++) + { + searchreplace(code->entry[i], Registers); + } + for (int i = 0; i < code->qty; i++) + { + searchreplace(code->entry[i], Arguments); + } + for (int i = 0; i < code->qty; i++) + { + bstring pat = bfromcstr(Sptr.pattern); + bstring reg = bfromcstr(Sptr.reg); + bfindreplace(code->entry[i], pat, reg, 0); + bdestroy(pat); + bdestroy(reg); + } + for (int i = 0; i < code->qty; i++) + { + bstring pat = bfromcstr(Bptr.pattern); + bstring reg = bfromcstr(Bptr.reg); + bfindreplace(code->entry[i], pat, reg, 0); + bdestroy(pat); + bdestroy(reg); + } + } + return 0; +} + + +struct bstrList* dynbench_getall() +{ + int totalgroups = 0; + struct bstrList* list = NULL; + DIR *dp = NULL; + struct dirent *ep = NULL; + DIR * (*ownopendir)(const char* folder) = &opendir; + int (*ownaccess)(const char*, int) = &access; + + bstring path = bformat("%s/.likwid/bench/%s", getenv("HOME"), ARCHNAME); + + if (!ownaccess(bdata(path), R_OK|X_OK)) + { + dp = ownopendir(bdata(path)); + if (dp != NULL) + { + while (ep = readdir(dp)) + { + if (strncmp(&(ep->d_name[strlen(ep->d_name)-4]), ".ptt", 4) == 0) + { + if (!list) list = bstrListCreate(); + totalgroups++; + bstring dname = bfromcstr(ep->d_name); + btrunc(dname, blength(dname)-4); + bstrListAdd(list, dname); + bdestroy(dname); + } + } + closedir(dp); + } + else + { + fprintf(stderr, "Failed to enter folder %s\n", bdata(path)); + } + } + bdestroy(path); + return list; +} + + +static bstring get_compiler(bstring candidates) +{ + bstring compiler = NULL; + bstring path = bfromcstr(getenv("PATH")); + struct bstrList *plist = NULL; + struct bstrList *clist = NULL; + int (*ownaccess)(const char*, int) = access; + + plist = bsplit(path, ':'); + clist = bsplit(candidates, ','); + + for (int i = 0; i < plist->qty && (!compiler); i++) + { + for (int j = 0; j < clist->qty && (!compiler); j++) + { + bstring tmp = bformat("%s/%s", bdata(plist->entry[i]), bdata(clist->entry[j])); + if (!ownaccess(bdata(tmp), R_OK|X_OK)) + { + compiler = bstrcpy(tmp); + } + bdestroy(tmp); + } + } + bdestroy(path); + bstrListDestroy(plist); + bstrListDestroy(clist); + return compiler; +} + +static int compile_file(bstring compiler, bstring flags, bstring asmfile, bstring objfile) +{ + if (blength(compiler) == 0 || blength(asmfile) == 0) + return -1; + char buf[1024]; + bstring bstdout = bfromcstr(""); + + + bstring cmd = bformat("%s %s %s -o %s", bdata(compiler), bdata(flags), bdata(asmfile), bdata(objfile)); + + FILE * fp = popen(bdata(cmd), "r"); + if (fp) + { + for (;;) { + /* Read another chunk */ + int ret = fread(buf, 1, sizeof(buf), fp); + if (ret < 0) { + fprintf(stderr, "fread(%p, 1, %lu, %p): %d, errno=%d\n", buf, sizeof(buf), fp, ret, errno); + bdestroy(cmd); + bdestroy(bstdout); + return -1; + } + else if (ret == 0) { + break; + } + bcatblk(bstdout, buf, ret); + } + if (blength(bstdout) > 0) + { + fprintf(stderr, "%s\n", bdata(bstdout)); + } + pclose(fp); + } + bdestroy(cmd); + bdestroy(bstdout); + + return 0; +} + + +static int open_function(bstring location, TestCase *testcase) +{ + void* handle; + char *error; + void* (*owndlsym)(void*, const char*) = dlsym; + + dlerror(); + testcase->dlhandle = dlopen(bdata(location), RTLD_LAZY); + if (!testcase->dlhandle) { + fprintf(stderr, "Error opening location %s: %s\n", bdata(location), dlerror()); + return -1; + } + dlerror(); + testcase->kernel = owndlsym(testcase->dlhandle, testcase->name); + if ((error = dlerror()) != NULL) { + dlclose(testcase->dlhandle); + fprintf(stderr, "Error opening function %s: %s\n", testcase->name, error); + return -1; + } + dlerror(); + + return 0; +} + + +int dynbench_test(bstring testname) +{ + int exist = 0; + char* home = getenv("HOME"); + if (!home) + { + fprintf(stderr, "Failed to get $HOME from environment\n"); + return exist; + } + bstring path = bformat("%s/.likwid/bench/%s/%s.ptt", home, ARCHNAME, bdata(testname)); + if (!access(bdata(path), R_OK)) + { + exist = 1; + } + bdestroy(path); + return exist; +} + +int dynbench_load(bstring testname, TestCase **testcase, char* tmpfolder, char *compilers, char* compileflags) +{ + int err = -1; + TestCase *test = NULL; + char* home = getenv("HOME"); + if (!home) + { + fprintf(stderr, "Failed to get $HOME from environment\n"); + return err; + } + bstring pttfile = bformat("%s/.likwid/bench/%s/%s.ptt", home, ARCHNAME, bdata(testname)); + if (!access(bdata(pttfile), R_OK)) + { + struct bstrList* code = analyse_ptt(pttfile, &test); + if (code && test) + { + test->dlhandle = NULL; + test->kernel = NULL; + test->name = malloc((blength(testname)+2) * sizeof(char)); + if (test->name) + { + int ret = snprintf(test->name, blength(testname)+1, "%s", bdata(testname)); + if (ret > 0) + { + test->name[ret] = '\0'; + } + if (tmpfolder && compilers) + { + pid_t pid = getpid(); + bstring buildfolder = bformat("%s/%ld", tmpfolder, pid); + if (mkdir(bdata(buildfolder), 0700) == 0) + { + int asm_written = 0; + bstring asmfile = bformat("%s/%ld/%s.S", tmpfolder , pid, bdata(testname)); + + struct bstrList* asmb = parse_asm(test, code); + if (asmb) + { + prepare_code(asmb); + if (write_asm(asmfile, asmb) != 0) + { + fprintf(stderr, "Failed to write assembly to file %s\n", bdata(asmfile)); + } + else + { + asm_written = 1; + } + bstrListDestroy(asmb); + } + else + { + fprintf(stderr, "Cannot parse assembly\n"); + } + + bstring candidates = bfromcstr(compilers); + bstring compiler = get_compiler(candidates); + if (asm_written && compiler) + { + int cret = 0; + bstring cflags; + if (compileflags) + { + cflags = bfromcstr(compileflags); + } + else + { + cflags = bfromcstr(""); + } + bstring objfile = bformat("%s/%ld/%s.o", tmpfolder , pid, bdata(testname)); + cret = compile_file(compiler, cflags, asmfile, objfile); + if (cret == 0) + { + cret = open_function(objfile, test); + if (cret == 0) + { + err = 0; + *testcase = test; + } + else + { + fprintf(stderr, "Cannot load function %s from %s\n", bdata(testname), bdata(objfile)); + } + } + else + { + fprintf(stderr, "Cannot compile file %s to %s\n", bdata(asmfile), bdata(objfile)); + } + bdestroy(cflags); + bdestroy(objfile); + } + else + { + fprintf(stderr, "Cannot find any compiler %s\n", bdata(buildfolder)); + } + bdestroy(candidates); + bdestroy(compiler); + bdestroy(asmfile); + + + } + else + { + fprintf(stderr, "Cannot create temporary directory %s\n", bdata(buildfolder)); + err = errno; + } + bdestroy(buildfolder); + } + else + { + err = 0; + *testcase = test; + } + } + else + { + fprintf(stderr, "Failed to allocate space for the testname\n"); + } + bstrListDestroy(code); + + } + else + { + fprintf(stderr, "Cannot read ptt file %s\n", bdata(pttfile)); + } + + } + else + { + fprintf(stderr, "Cannot open ptt file %s\n", bdata(pttfile)); + } + bdestroy(pttfile); + + return err; +} + +int dynbench_close(TestCase* testcase, char* tmpfolder) +{ + if (testcase) + { + if (testcase->dlhandle) + { + dlclose(testcase->dlhandle); + testcase->dlhandle = NULL; + testcase->kernel = NULL; + } + if (tmpfolder) + { + pid_t pid = getpid(); + + bstring buildfolder = bformat("%s/%ld", tmpfolder, pid); + bstring asmfile = bformat("%s/%s.S", bdata(buildfolder), testcase->name); + bstring objfile = bformat("%s/%s.o", bdata(buildfolder), testcase->name); + + if (!access(bdata(asmfile), R_OK)) unlink(bdata(asmfile)); + if (!access(bdata(objfile), R_OK)) unlink(bdata(objfile)); + if (!access(bdata(buildfolder), R_OK)) rmdir(bdata(buildfolder)); + + bdestroy(asmfile); + bdestroy(objfile); + bdestroy(buildfolder); + } + free(testcase->name); + testcase->name = NULL; + free(testcase->desc); + testcase->desc = NULL; + free(testcase); + testcase = NULL; + } + return 0; +} diff --git a/bench/src/strUtil.c b/bench/src/strUtil.c index 017d4b617..0f7949d5d 100644 --- a/bench/src/strUtil.c +++ b/bench/src/strUtil.c @@ -225,6 +225,11 @@ parse_streams(Workgroup* group, const_bstring str, int numberOfStreams) { struct bstrList* tokens; struct bstrList* subtokens; + if (group->init_per_thread) + { + fprintf(stderr, "Error: Cannot place stream in different stream when initialization per thread is selected.\n"); + return -1; + } tokens = bsplit(str,','); if (tokens->qty < numberOfStreams) @@ -297,8 +302,13 @@ bstr_to_workgroup(Workgroup* group, const_bstring str, DataType type, int number bstrListDestroy(tokens); return 1; } - parse_streams(group, tokens->entry[1], numberOfStreams); + parseStreams = parse_streams(group, tokens->entry[1], numberOfStreams); bdestroy(domain); + if (parseStreams) + { + bstrListDestroy(tokens); + return parseStreams; + } } else if (tokens->qty == 1) { diff --git a/bench/src/threads.c b/bench/src/threads.c index 8e99a77e6..b6b4f8ffa 100644 --- a/bench/src/threads.c +++ b/bench/src/threads.c @@ -37,6 +37,7 @@ #include #include +#include /* ##### EXPORTED VARIABLES ########################################### */ @@ -135,22 +136,12 @@ threads_create(void *(*startRoutine)(void*)) } void -threads_createGroups(int numberOfGroups) +threads_createGroups(int numberOfGroups, Workgroup *groups) { int i; int j; - int numThreadsPerGroup; int globalId = 0; - if (numThreads % numberOfGroups) - { - fprintf(stderr, "ERROR: Not enough threads %d to create %d groups\n",numThreads,numberOfGroups); - } - else - { - numThreadsPerGroup = numThreads / numberOfGroups; - } - threads_groups = (ThreadGroup*) malloc(numberOfGroups * sizeof(ThreadGroup)); if (!threads_groups) { @@ -160,20 +151,20 @@ threads_createGroups(int numberOfGroups) for (i = 0; i < numberOfGroups; i++) { - threads_groups[i].numberOfThreads = numThreadsPerGroup; - threads_groups[i].threadIds = (int*) malloc(numThreadsPerGroup * sizeof(int)); + threads_groups[i].numberOfThreads = groups[i].numberOfThreads; + threads_groups[i].threadIds = (int*) malloc(threads_groups[i].numberOfThreads * sizeof(int)); if (!threads_groups[i].threadIds) { fprintf(stderr, "ERROR: Cannot allocate threadID list for thread groups - %s\n", strerror(errno)); exit(EXIT_FAILURE); } - for (j = 0; j < numThreadsPerGroup; j++) + for (j = 0; j < threads_groups[i].numberOfThreads; j++) { threads_data[globalId].threadId = j; threads_data[globalId].groupId = i; threads_data[globalId].numberOfGroups = numberOfGroups; - threads_data[globalId].numberOfThreads = numThreadsPerGroup; + threads_data[globalId].numberOfThreads = threads_groups[i].numberOfThreads; threads_groups[i].threadIds[j] = globalId++; } } diff --git a/bench/x86-64/stream_mem_avx512.ptt b/bench/x86-64/stream_mem_avx512.ptt new file mode 100644 index 000000000..b72ddedd8 --- /dev/null +++ b/bench/x86-64/stream_mem_avx512.ptt @@ -0,0 +1,29 @@ +STREAMS 3 +TYPE DOUBLE +FLOPS 2 +BYTES 24 +DESC Double-precision stream triad A(i) = B(i)*c + C(i), uses AVX-512 and non-temporal stores +LOADS 2 +STORES 1 +INSTR_CONST 17 +INSTR_LOOP 19 +UOPS 26 +vmovapd zmm5, [rip+SCALAR] +LOOP 32 +vmovapd zmm1, [STR1 + GPR1*8] +vmovapd zmm2, [STR1 + GPR1*8+64] +vmovapd zmm3, [STR1 + GPR1*8+128] +vmovapd zmm4, [STR1 + GPR1*8+192] +vmulpd zmm1, zmm1, zmm5 +vaddpd zmm1, zmm1, [STR2 + GPR1*8] +vmulpd zmm2, zmm2, zmm5 +vaddpd zmm2, zmm2, [STR2 + GPR1*8+64] +vmulpd zmm3, zmm3, zmm5 +vaddpd zmm3, zmm3, [STR2 + GPR1*8+128] +vmulpd zmm4, zmm4, zmm5 +vaddpd zmm4, zmm4, [STR2 + GPR1*8+192] +vmovntpd [STR0 + GPR1*8] , zmm1 +vmovntpd [STR0 + GPR1*8+64], zmm2 +vmovntpd [STR0 + GPR1*8+128], zmm3 +vmovntpd [STR0 + GPR1*8+192], zmm4 + diff --git a/config.mk b/config.mk index 82d2969a9..b566e22bf 100644 --- a/config.mk +++ b/config.mk @@ -6,7 +6,8 @@ # Please have a look in INSTALL and the WIKI for details on # configuration options setup steps. -# supported: GCC, CLANG, ICC, MIC (ICC), GCCX86 (for 32bit systems) +# Supported: GCC, CLANG, ICC, MIC (ICC), GCCX86 (for 32bit systems) +# GCCARMv8, GCCARMv7 and GCCPOWER COMPILER = GCC#NO SPACE # Path were to install likwid @@ -57,16 +58,25 @@ INSTALLED_PREFIX ?= $(PREFIX)#NO SPACE INSTALLED_BINPREFIX = $(INSTALLED_PREFIX)/bin#NO SPACE INSTALLED_LIBPREFIX = $(INSTALLED_PREFIX)/lib#NO SPACE +# Build the accessDaemon. Have a look in the WIKI for details. +BUILDDAEMON = true#NO SPACE # For the daemon based secure msr/pci access configure # the absolute path to the msr daemon executable. ACCESSDAEMON = $(PREFIX)/sbin/likwid-accessD#NO SPACE INSTALLED_ACCESSDAEMON = $(INSTALLED_PREFIX)/sbin/likwid-accessD#NO SPACE -# Build the accessDaemon. Have a look in the WIKI for details. -BUILDDAEMON = true#NO SPACE # Build the setFrequencies daemon to allow users setting the CPU and Uncore # frequency BUILDFREQ = true#NO SPACE +# Paths for frequencie deaemon after installation +FREQDAEMON = $(PREFIX)/sbin/likwid-setFreq#NO SPACE +INSTALLED_FREQDAEMON = $(INSTALLED_PREFIX)/sbin/likwid-setFreq#NO SPACE + +# Build the appDaemon. It's not really a daemon but an LD_PRELOAD library +# It is required to get access to the application context. +BUILDAPPDAEMON=true +APPDAEMON = $(PREFIX)/lib/likwid-appDaemon.so#NO SPACE +INSTALLED_APPDAEMON = $(INSTALLED_PREFIX)/lib/likwid-appDaemon.so#NO SPACE # chown installed tools to this user/group # if you change anything here, make sure that the user/group can access diff --git a/doc/likwid-agent.1 b/doc/likwid-agent.1 deleted file mode 100644 index f50dbca45..000000000 --- a/doc/likwid-agent.1 +++ /dev/null @@ -1,94 +0,0 @@ -.TH LIKWID-AGENT 1 likwid\-VERSION -.SH NAME -likwid-agent \- monitoring daemon for hardware performance counters -.SH SYNOPSIS -.B likwid-agent -.SH DESCRIPTION -.B likwid-agent -is a daemon application that uses -.B likwid-perfctr(1) -to measure hardware performance counters. The basic configuration is in a global configuration file. The configuration of the hardware event sets is done with extra files suitable for each architecture. Besides the hardware event configuration, the raw data can be transformed using formulas to interested metrics. In order to output to much data, the data can be further filtered or aggregated. -.B likwid-agent -provides multiple store backends like logfiles, RRD (Round Robin Database) or gmetric (Ganglia Monitoring System). - -.SH CONFIG FILE -The global configuration file has the following options: -.TP -.B GROUPPATH -Path to the group files containing event set and output defintitions. See section -.B GROUP FILES -for information. -.TP -.B EVENTSET ... -Space separated list of groups (without .txt) that should be monitored. -.TP -.B DURATION