-
Notifications
You must be signed in to change notification settings - Fork 10
/
make_source
executable file
·348 lines (289 loc) · 8.91 KB
/
make_source
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
#!/usr/bin/env perl
use strict;
use warnings;
use File::Find;
use File::Path;
use File::Basename;
use File::Copy;
use File::Copy::Recursive qw(fcopy rcopy dircopy fmove rmove dirmove);
use Cwd qw (cwd getcwd abs_path);
my $CUSTOM_FILE_TYPE = 'custom-file';
my $ITEMS_XML_FILE_TYPE = 'intermine-items-xml-file';
my $LARGE_ITEMS_XML_FILE_TYPE = 'intermine-items-large-xml-file';
my $FASTA_FILE_TYPE = 'fasta';
my $GFF_FILE_TYPE = 'gff';
my $DB_TYPE = 'db';
my $OBO_TYPE = 'obo';
sub custom_file_props;
sub xml_items_props;
my %type_properties = (
$CUSTOM_FILE_TYPE => {
project_props_gen => \&custom_file_props,
create_main => 1,
base_class => 'BioFileConverter' },
$ITEMS_XML_FILE_TYPE => {
project_props_gen => \&xml_items_props,
create_main => 0 },
$LARGE_ITEMS_XML_FILE_TYPE => {
project_props_gen => \&large_xml_items_props,
create_main => 0 },
$GFF_FILE_TYPE => {
project_props_gen => \&gff_props,
create_main => 1,
base_class => 'GFF3RecordHandler' },
$FASTA_FILE_TYPE => {
project_props_gen => \&fasta_props,
create_main => 1,
base_class => 'FastaConverter' },
$DB_TYPE => {
project_props_gen => \&db_props,
create_main => 1,
base_class => 'BioDBConverter' },
$OBO_TYPE => {
project_props_gen => \&obo_props,
create_main => 0 }
);
my $source_types = join "\n", map { ' ' x 20 . $_ } sort keys %type_properties;
if (@ARGV != 2) {
if (@ARGV > 2 && $ARGV[1] =~ $OBO_TYPE) {
if (@ARGV != 5) {
obo_usage(<<OBO_USAGE);
Error: the obo source type needs five arguments.
OBO_USAGE
}
} else {
usage (<<USAGE);
Error: needs two arguments.
USAGE
}
}
if (@ARGV == 2 && $ARGV[1] =~ $OBO_TYPE) {
obo_usage(<<OBO_USAGE);
Error: the obo source type needs five arguments.
OBO_USAGE
}
my $source_name = shift;
my $source_type = shift;
# only used for obo source_type
my $ontology_name = shift;
my $ontology_url = shift;
my $term_class = shift;
(my $camel_case_source_name = $source_name) =~ s/(\w)([^\-_]+)[_\-]?/\U$1\E$2/g;
sub custom_file_props
{
return <<PROPS;
have.file.custom.tgt = true
converter.class = org.intermine.bio.dataconversion.${camel_case_source_name}Converter
PROPS
}
sub fasta_props
{
return <<PROPS;
have.file.custom.direct = true
loader.class = org.intermine.bio.dataconversion.${camel_case_source_name}FastaConverter
PROPS
}
sub gff_props
{
return <<PROPS;
have.file.gff3 = true
gff3.handlerClassName = org.intermine.bio.dataconversion.${camel_case_source_name}GFF3RecordHandler
PROPS
}
sub db_props
{
return <<PROPS;
have.db.tgt = true
converter.class = org.intermine.bio.dataconversion.${camel_case_source_name}Converter
PROPS
}
sub xml_items_props
{
return <<PROPS;
have.file.xml.tgt = true
PROPS
}
sub large_xml_items_props
{
return <<PROPS;
have.large.file.xml.tgt = true
PROPS
}
sub obo_props
{
return <<PROPS;
have.file.obo = true
\# specify the name of the ontology
obo.ontology.name = ${ontology_name}
\# set a URL at which more information on the ontology can be found
obo.ontology.url = ${ontology_url}
\# set the class name for terms that will be created, you may need to define
\# this class in a ${source_name}_additions.xml file for this source
obo.term.class = ${term_class}
PROPS
}
sub usage
{
my $message = shift;
die <<"MESSAGE";
$0: $message
usage:
$0 <source-name> <source-type>
<source-name> - the name of the directory to create in current directory
eg. foobase-gene-data
<source-type> - possible source types are:
$source_types
MESSAGE
}
sub obo_usage
{
my $message = shift;
die <<"MESSAGE";
$0: $message
For the obo source_type you need to specify additional parameters:
usage:
$0 <source-name> <source-type> <ontology-name> <ontology-url> <term-class>
<source-name> - the name of the directory to create in this directory
eg. foobase-gene-data
<source-type> - the source type is 'obo'
<ontology-name> - the name of the ontology
e.g. 'Gene Ontology'
<ontology-url> - a url to find more information about the ontology
e.g. 'http://www.geneontology.org'
<term-class> - the name of the class used to represent each ontology term
e.g. GOTerm
MESSAGE
}
# get the directory where this script is
my $script_directory = dirname(abs_path($0));
# check to see if there is already a gradle project created.
# if YES, create a simple source
# if NO, create a gradle project
my $create_project = 1;
if (-e "settings.gradle") {
# we already have a settings.gradle file. don't create a new project
$create_project = 0;
}
# copy over gradle files
# add this source to settings.gradle
# move into that directory
if ($create_project) {
my $source_dir = "$script_directory/skeletons/data-sources/gradle";
dircopy("$source_dir", cwd) or die $!;
}
# add this data source to our settings file
my $settings_file = 'settings.gradle';
open(my $fh, '>>', $settings_file) or die "Could not open file '$settings_file' $!";
say $fh "include ':bio-source-$source_name'";
say $fh "project(':bio-source-$source_name').projectDir = new File(settingsDir, './$source_name')";
close $fh;
my %type_config;
if (exists $type_properties{$source_type}) {
%type_config = %{$type_properties{$source_type}};
} else {
usage ("unknown source type: $source_type");
}
# there are two types of sources. Ones that create a src/main directory and ones
# that only create a /resources directory. We use different skeletons for each
my $create_main = $type_config{create_main};
my $base_class = $type_config{base_class};
# skeletons are in the scripts dir
my $STRUCTURE_DIR = "$script_directory/skeletons/data-sources/resources-only-source";
if ($create_main) {
if ($base_class eq 'FastaConverter') {
$STRUCTURE_DIR = "$script_directory/skeletons/data-sources/custom-fasta-source";
} else {
$STRUCTURE_DIR = "$script_directory/skeletons/data-sources/custom-source";
}
}
# map of patterns to substitute; for each key we look for ${key} and substitute
# the value from the map
my %substitutions = (
'source-name' => $source_name,
'camel-case-source-name' => $camel_case_source_name
);
$substitutions{'project-properties'} = &{$type_config{project_props_gen}};
# Take a string and substitute all of the keys that appear in the %substitutions
# map with the values from the map. eg. the string "${source-name}" will
# change to $source_name
sub substitute
{
my $text = shift;
for my $subs_key (keys %substitutions) {
$text =~ s/\$\{$subs_key\}/$substitutions{$subs_key}/;
}
return $text;
}
my $start_dir = getcwd();
sub starts_with_main
{
my $path = shift;
return $path =~ m:^main($|/):;
}
sub starts_with_test
{
my $path = shift;
return $path =~ m:^test($|/):;
}
my $full_source_dir = "$start_dir/$source_name";
if (-e $full_source_dir) {
usage(<<MESSAGE);
$full_source_dir already exists
MESSAGE
}
# This function is called once for each file and directory in the
# skeleton source directory. It will substitute patterns like ${FOO}
# in file contents and in file names, then write the new files to the
# new source directory
sub process
{
my $orig_path = $File::Find::name;
my $dest_path = $File::Find::name;
$dest_path =~ s:$STRUCTURE_DIR/?::;
if ($create_main) {
my $base_class = $type_config{base_class};
if ($orig_path =~ /_template$/) {
my $template_string = ".${base_class}_template";
if ($orig_path =~ /$template_string$/) {
$dest_path =~ s/$template_string$//;
} else {
return;
}
}
} else {
if (starts_with_main($dest_path)) {
return;
}
if (starts_with_test($dest_path)) {
return;
}
}
$dest_path = $full_source_dir . '/' . substitute($dest_path);
if (-d $orig_path) {
mkpath($dest_path);
} else {
open my $orig, '<', $orig_path or die "can't open $orig_path: $!\n";
open my $dest, '>', $dest_path or die "can't open $dest_path: $!\n";
while (my $line = <$orig>) {
$line = substitute($line);
print $dest $line;
}
close $orig or die "can't close $orig_path: $!\n";
close $dest or die "can't close $dest_path: $!\n";
}
}
# remove subversion directories and backup files from the argument list
sub preprocess
{
return grep {$_ !~ /^(\.svn|.*~)$/} @_;
}
# recursively call process() on all directories and files
find({
wanted => \&process,
preprocess => \&preprocess
},
$STRUCTURE_DIR);
warn "created $full_source_dir directory for $source_name\n";
if ($source_type =~ $OBO_TYPE) {
warn "\n You may need to add $term_class to the data model in $full_source_dir/", $source_name, "_additions.xml\n\n";
}