-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathpreprocess_tess_img.py
155 lines (106 loc) · 5.36 KB
/
preprocess_tess_img.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import numpy as np
import matplotlib.pyplot as pp
import math
import cv2
import pytesseract
def preprocess_img(img):
"TO GET preprocess.png"
""" THIS HAS THICCER LETTERS NEAR THE VERTICAL LINE (GOOD THING)"""
#convert pyautogui/PIL to opencv format (numpy array)
img = np.array(img,dtype=np.uint8)
savImg = img
#make image black/white
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY )
#threshold to isolate black/white color
#_,img = cv2.threshold(img,105,255,cv2.THRESH_BINARY)
_, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY|cv2.THRESH_OTSU)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (4,7))
img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
cv2.imwrite('preprocess.png',img)
"TO GET preprocess2.png"
""" THIS HEAVILY THINS THE VERTICAL LINE BUT ALSO ALL LETTERS (BAD)"""
#remove vertical
vertical_kernal = cv2.getStructuringElement(cv2.MORPH_RECT,(1,2))
detected_lines = cv2.morphologyEx(img,cv2.MORPH_OPEN,vertical_kernal,iterations=2)
cnts = cv2.findContours(detected_lines,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(img,[c],-1,(255,255,255),2)
#repair image
repair_kernal = cv2.getStructuringElement(cv2.MORPH_RECT,(3,1))
results = 255 - cv2.morphologyEx(255-img,cv2.MORPH_CLOSE,repair_kernal,iterations=1)
img = results
#remove vertical
vertical_kernal = cv2.getStructuringElement(cv2.MORPH_RECT,(1,5))
detected_lines = cv2.morphologyEx(img,cv2.MORPH_OPEN,vertical_kernal,iterations=2)
cnts = cv2.findContours(detected_lines,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(img,[c],-1,(255,255,255),2)
#repair image
repair_kernal = cv2.getStructuringElement(cv2.MORPH_RECT,(3,1))
results = 255 - cv2.morphologyEx(255-img,cv2.MORPH_CLOSE,repair_kernal,iterations=1)
img = results
cv2.imwrite('preprocess2.png',img)
""" To get preprocess3.png"""
""" TRIES TO RESTORE THICKNESS IN LETTERS """
img = cv2.medianBlur(img,3)
#erosion
kernal = np.ones((1,1),np.uint8)
img = cv2.erode(img,kernel,iterations =1)
cv2.imwrite('preprocess3.png',img)
# image = img
# blur = cv2.GaussianBlur(image, (5,5), 0)
# thresh = cv2.threshold(blur, 130, 255, cv2.THRESH_BINARY_INV)[1]
# vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,5))
# horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,1))
# remove_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel)
# remove_vertical = cv2.morphologyEx(remove_horizontal, cv2.MORPH_OPEN, horizontal_kernel)
# cnts = cv2.findContours(remove_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# cnts = cnts[0] if len(cnts) == 2 else cnts[1]
# mask = np.ones(image.shape, dtype=np.uint8)
# for c in cnts:
# area = cv2.contourArea(c)
# if area > 50:
# cv2.drawContours(mask, [c], -1, (255,255,255), -1)
# kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
# mask = cv2.dilate(mask, kernel, iterations=1)
# image = 255 - image
# result = 255 - cv2.bitwise_and(mask, image)
# img = result
# img = cv2.blur(img,(3,3))
# _,img = cv2.threshold(img,50,255,cv2.THRESH_BINARY)
# img = cv2.blur(img,(3,3))
# _,img = cv2.threshold(img,50,255,cv2.THRESH_BINARY)
#img = cv2.blur(img,(2,2))
#_, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY|cv2.THRESH_OTSU)
# #img = savImg
# dst = cv2.Canny(img, 50, 200, None, 3)
# img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
# # Copy edges to the images that will display the results in BGR
# cdst = cv2.cvtColor(dst, cv2.COLOR_GRAY2BGR)
# cdstP = np.copy(cdst)
# lines = cv2.HoughLines(dst, 2, np.pi / 180, 150, None, 0, 0)
# if lines is not None:
# for i in range(0, len(lines)):
# rho = lines[i][0][0]
# theta = lines[i][0][1]
# a = math.cos(theta)
# b = math.sin(theta)
# x0 = a * rho
# y0 = b * rho
# pt1 = (int(x0 + 1000*(-b)), int(y0 + 1000*(a)))
# pt2 = (int(x0 - 1000*(-b)), int(y0 - 1000*(a)))
# cv2.line(img, pt1, pt2, (255,255,255), 7, cv2.LINE_AA)
# linesP = cv2.HoughLinesP(dst, 1, np.pi / 180, 50, None, 50, 10)
# if linesP is not None:
# for i in range(0, len(linesP)):
# l = linesP[i][0]
# cv2.line(cdstP, (l[0], l[1]), (l[2], l[3]), (0,0,255), 3, cv2.LINE_AA)
# cv2.imwrite('blog.png',img)
# # img = cv2.medianBlur(img,5)
# kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (4,7))
# img = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
# cv2.imwrite('preprocess3.png',img)
img = cv2.imread('captcha_sample.png')
preprocess_img(img)