-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsql_split.rb
148 lines (118 loc) · 3.89 KB
/
sql_split.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# coding: utf-8
# frozen_string_literal: true
require 'clamp'
require 'byebug'
require 'fileutils'
require 'ruby-progressbar'
Clamp do
option ['-l', '--lines'],
:lines, 'number of inserts to put into the file', default: nil do |l|
Integer(l)
end
option ['-s', '--size'],
:size, 'size for each output to be as a max, in MB', default: nil do |s|
Integer(s) * 1_000_000
end
option ['-a', '--archive'], :flag, 'for archives'
parameter 'FILE', 'the sql file to split', attribute_name: :file_name
def execute
# Open file
unless File.exist?(file_name)
puts "❌ Error: File #{file_name} not found"
exit(1)
end
file = File.open(file_name, 'r')
@directory = "#{File.dirname(file_name)}/output"
FileUtils.remove_dir(@directory, true)
FileUtils.mkdir(@directory)
header = get_file_header file, archive?
puts '❌ Error: No sql INSERT commmands found' && exit(2) if header.nil?
@progressbar = ProgressBar.create(starting_at: 0,
total: nil,
throttle_rate: 0.1)
@total_files = 0
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
total = split_into_lines file, lines, header, size
@progressbar.finish
end_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
elapsed_time = end_time - start_time
puts 'DONE!!!! 👾 🙌 🎉 🥂 🎈 🍾 🐝 🔥'
puts "Total time: #{elapsed_time}"
puts "Total lines processed: #{total}"
puts "Number of files: #{@total_files}"
end
def get_file_header(file, archive)
# First, let's get everything before the first "INSERT" command
header = ''
until file.eof?
line = file.readline
break if line.start_with? 'INSERT INTO'
if archive == true && line.start_with?('/*!40000 ALTER TABLE')
# header += 'LOCK TABLE `user_emails_archive` WRITE;\n'
header += line
header += "LOCK TABLES `user_emails` WRITE;\n"
else
header += line
end
end
return nil if file.eof?
header
end
def split_into_lines(input_file, number_of_lines, header, size)
line_count = 0
file_count = 0
total_lines = 0
output_file = start_file(file_count, header)
until input_file.eof?
line = input_file.readline
line.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
matches = line.split(/(\([0-9]+,')/)
# If there are no matches we assume something else is up,
# so we just write the line
if matches.count.zero?
output_file.write line
next
end
matches.delete_at(0)
id_block = nil
matches.each do |new_line|
# Cut off the old file if we've reached the correct number of lines
if (number_of_lines.nil? == false && (line_count += 1) > number_of_lines) ||
(size.nil? == false && output_file.size > size)
output_file = reset_file(output_file, file_count += 1, header)
line_count = 0
end
if id_block.nil?
id_block = new_line
next
end
new_line = id_block + new_line
id_block = nil
unless new_line.start_with? 'INSERT INTO'
prefix = 'INSERT INTO `user_emails` VALUES '
end
last_character = new_line[-1, 1]
if last_character != ';'
new_line.delete_suffix!(',')
new_line += ';'
end
output_file.write prefix + new_line + "\n"
end
@progressbar.log "File Count: #{file_count}"
total_lines += (matches.count / 2)
@progressbar.increment
end
output_file.close
total_lines
end
def start_file(count, header)
@total_files += 1
file = File.new("#{@directory}/x_#{count}.sql", 'w')
file.write header
file
end
def reset_file(file, count, header)
file.close
start_file(count, header)
end
end