-
Notifications
You must be signed in to change notification settings - Fork 929
/
from_wikipedia.py
101 lines (93 loc) · 3.04 KB
/
from_wikipedia.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import os
from typing import List, Tuple
from trdg.generators.from_strings import GeneratorFromStrings
from trdg.data_generator import FakeTextDataGenerator
from trdg.string_generator import create_strings_from_wikipedia
from trdg.utils import load_dict, load_fonts
class GeneratorFromWikipedia:
"""Generator that uses sentences taken from random Wikipedia articles"""
def __init__(
self,
count: int = -1,
minimum_length: int = 1,
fonts: List[str] = [],
language: str = "en",
size: int = 32,
skewing_angle: int = 0,
random_skew: bool = False,
blur: int = 0,
random_blur: bool = False,
background_type: int = 0,
distorsion_type: int = 0,
distorsion_orientation: int = 0,
is_handwritten: bool = False,
width: int = -1,
alignment: int = 1,
text_color: str = "#282828",
orientation: int = 0,
space_width: float = 1.0,
character_spacing: int = 0,
margins: Tuple[int, int, int, int] = (5, 5, 5, 5),
fit: bool = False,
output_mask: bool = False,
word_split: bool = False,
image_dir: str = os.path.join(
"..", os.path.split(os.path.realpath(__file__))[0], "images"
),
stroke_width: int = 0,
stroke_fill: str = "#282828",
image_mode: str = "RGB",
output_bboxes: int = 0,
):
self.generated_count = 0
self.count = count
self.minimum_length = minimum_length
self.language = language
self.batch_size = min(max(count, 1), 1000)
self.steps_until_regeneration = self.batch_size
self.generator = GeneratorFromStrings(
create_strings_from_wikipedia(
self.minimum_length, self.batch_size, self.language
),
count,
fonts if len(fonts) else load_fonts(language),
language,
size,
skewing_angle,
random_skew,
blur,
random_blur,
background_type,
distorsion_type,
distorsion_orientation,
is_handwritten,
width,
alignment,
text_color,
orientation,
space_width,
character_spacing,
margins,
fit,
output_mask,
word_split,
image_dir,
stroke_width,
stroke_fill,
image_mode,
output_bboxes,
)
def __iter__(self):
return self
def __next__(self):
if self.generated_count == self.count:
raise StopIteration
self.generated_count += 1
return self.next()
def next(self):
if self.generator.generated_count >= self.steps_until_regeneration:
self.generator.strings = create_strings_from_wikipedia(
self.minimum_length, self.batch_size, self.language
)
self.steps_until_regeneration += self.batch_size
return self.generator.next()