From cddedb6fac5fb2a2ebc26624b14fb03ad04a325e Mon Sep 17 00:00:00 2001 From: Daniel <67126972+ddaniel27@users.noreply.github.com> Date: Tue, 25 Jun 2024 03:11:14 -0500 Subject: [PATCH] [IMPLEMENTATION] Compressor RLE (#726) * [NEW IMPLEMENTATION] RLE compression algorithm * [FIX] Fix typo * [FIX] Suggestion added --- compression/rlecoding.go | 73 +++++++++++++++ compression/rlecoding_test.go | 161 ++++++++++++++++++++++++++++++++++ 2 files changed, 234 insertions(+) create mode 100644 compression/rlecoding.go create mode 100644 compression/rlecoding_test.go diff --git a/compression/rlecoding.go b/compression/rlecoding.go new file mode 100644 index 000000000..c08d3e4bd --- /dev/null +++ b/compression/rlecoding.go @@ -0,0 +1,73 @@ +/* +rlecoding.go +description: run length encoding and decoding +details: +Run-length encoding (RLE) is a simple form of data compression in which runs of data are stored as a single data value and count, rather than as the original run. This is useful when the data contains many repeated values. For example, the data "WWWWWWWWWWWWBWWWWWWWWWWWWBBB" can be compressed to "12W1B12W3B". The algorithm is simple and can be implemented in a few lines of code. +author(s) [ddaniel27](https://github.com/ddaniel27) +*/ +package compression + +import ( + "bytes" + "fmt" + "regexp" + "strconv" + "strings" +) + +// RLEncode takes a string and returns its run-length encoding +func RLEncode(data string) string { + var result string + count := 1 + for i := 0; i < len(data); i++ { + if i+1 < len(data) && data[i] == data[i+1] { + count++ + continue + } + result += fmt.Sprintf("%d%c", count, data[i]) + count = 1 + } + return result +} + +// RLEdecode takes a run-length encoded string and returns the original string +func RLEdecode(data string) string { + var result string + regex := regexp.MustCompile(`(\d+)(\w)`) + + for _, match := range regex.FindAllStringSubmatch(data, -1) { + num, _ := strconv.Atoi(match[1]) + result += strings.Repeat(match[2], num) + } + + return result +} + +// RLEncodebytes takes a byte slice and returns its run-length encoding as a byte slice +func RLEncodebytes(data []byte) []byte { + var result []byte + var count byte = 1 + + for i := 0; i < len(data); i++ { + if i+1 < len(data) && data[i] == data[i+1] { + count++ + continue + } + result = append(result, count, data[i]) + count = 1 + } + + return result +} + +// RLEdecodebytes takes a run-length encoded byte slice and returns the original byte slice +func RLEdecodebytes(data []byte) []byte { + var result []byte + + for i := 0; i < len(data); i += 2 { + count := int(data[i]) + result = append(result, bytes.Repeat([]byte{data[i+1]}, count)...) + } + + return result +} diff --git a/compression/rlecoding_test.go b/compression/rlecoding_test.go new file mode 100644 index 000000000..bf9af6bf6 --- /dev/null +++ b/compression/rlecoding_test.go @@ -0,0 +1,161 @@ +package compression_test + +import ( + "bytes" + "testing" + + "github.com/TheAlgorithms/Go/compression" +) + +func TestCompressionRLEncode(t *testing.T) { + tests := []struct { + name string + data string + want string + }{ + { + name: "test 1", + data: "WWWWWWWWWWWWBWWWWWWWWWWWWBBB", + want: "12W1B12W3B", + }, + { + name: "test 2", + data: "AABCCCDEEEE", + want: "2A1B3C1D4E", + }, + { + name: "test 3", + data: "AAAABBBCCDA", + want: "4A3B2C1D1A", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := compression.RLEncode(tt.data); got != tt.want { + t.Errorf("RLEncode() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestCompressionRLEDecode(t *testing.T) { + tests := []struct { + name string + data string + want string + }{ + { + name: "test 1", + data: "12W1B12W3B", + want: "WWWWWWWWWWWWBWWWWWWWWWWWWBBB", + }, + { + name: "test 2", + data: "2A1B3C1D4E", + want: "AABCCCDEEEE", + }, + { + name: "test 3", + data: "4A3B2C1D1A", + want: "AAAABBBCCDA", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := compression.RLEdecode(tt.data); got != tt.want { + t.Errorf("RLEdecode() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestCompressionRLEncodeBytes(t *testing.T) { + tests := []struct { + name string + data []byte + want []byte + }{ + { + name: "test 1", + data: []byte("WWWWWWWWWWWWBWWWWWWWWWWWWBBB"), + want: []byte{12, 'W', 1, 'B', 12, 'W', 3, 'B'}, + }, + { + name: "test 2", + data: []byte("AABCCCDEEEE"), + want: []byte{2, 'A', 1, 'B', 3, 'C', 1, 'D', 4, 'E'}, + }, + { + name: "test 3", + data: []byte("AAAABBBCCDA"), + want: []byte{4, 'A', 3, 'B', 2, 'C', 1, 'D', 1, 'A'}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := compression.RLEncodebytes(tt.data); !bytes.Equal(got, tt.want) { + t.Errorf("RLEncodebytes() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestCompressionRLEDecodeBytes(t *testing.T) { + tests := []struct { + name string + data []byte + want []byte + }{ + { + name: "test 1", + data: []byte{12, 'W', 1, 'B', 12, 'W', 3, 'B'}, + want: []byte("WWWWWWWWWWWWBWWWWWWWWWWWWBBB"), + }, + { + name: "test 2", + data: []byte{2, 'A', 1, 'B', 3, 'C', 1, 'D', 4, 'E'}, + want: []byte("AABCCCDEEEE"), + }, + { + name: "test 3", + data: []byte{4, 'A', 3, 'B', 2, 'C', 1, 'D', 1, 'A'}, + want: []byte("AAAABBBCCDA"), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := compression.RLEdecodebytes(tt.data); !bytes.Equal(got, tt.want) { + t.Errorf("RLEdecodebytes() = %v, want %v", got, tt.want) + } + }) + } +} + +/* --- BENCHMARKS --- */ +func BenchmarkRLEncode(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = compression.RLEncode("WWWWWWWWWWWWBWWWWWWWWWWWWBBB") + } +} + +func BenchmarkRLEDecode(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = compression.RLEdecode("12W1B12W3B") + } +} + +func BenchmarkRLEncodeBytes(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = compression.RLEncodebytes([]byte("WWWWWWWWWWWWBWWWWWWWWWWWWBBB")) + } +} + +func BenchmarkRLEDecodeBytes(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = compression.RLEdecodebytes([]byte{12, 'W', 1, 'B', 12, 'W', 3, 'B'}) + } +}