-
Notifications
You must be signed in to change notification settings - Fork 3
/
clean_data_files.py
81 lines (57 loc) · 1.75 KB
/
clean_data_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/python
#
# Test observation stats
# generated
import sys
import os
import glob
import time
REALMODE = False
vit_dir = 'vit_output/'
bw_dir = 'bw_output/'
seqdir = 'sequences/'
#urunid = str(uuid.uuid4()) # a unique hash code for this run
##if these don't exist, create them
#for ndir in [datadir, seqdir]:
#if not (os.path.exists(os.path.dirname(ndir))):
#os.mkdir(ndir)
# Metadata file cleanup and sorting
#
metadata_name = 'metadata.txt'
# Metadata file format: each line: (comma sep)
#
# 0) date and time stamp
# 1) name of data file
# 2) ownname (name of the top level file)
# 3) git hash (1st 10 chars of current git hash)
# 4) number of HMM / BT states
# 5) text field (comment)
#
# Purge Empty data files
#datafile_name = datadir+'data_'+urunid+'.csv' # a unique filename
ndel = 0
# purge zero length files (from crashed or ^c'ed runs)
for dir in [vit_dir, bw_dir]:
dirlist = glob.glob(dir+'*')
for f in dirlist:
d = os.stat(f) # check file size
if int(d.st_size) < 1:
#print 'ZERO SIZE: ',f
print 'Planning to remove: ', f,' because ', d.st_size , 'bytes'
if(REALMODE):
os.remove(f)
ndel += 1
################################################
## Delete Sequence data more than 1 wk old
current_time = time.time()
for f in glob.glob(seqdir+'*'):
creation_time = os.path.getctime(f)
if (current_time - creation_time) // (24 * 3600) >= 7:
if(REALMODE):
os.unlink(f)
print 'designating {} to be removed'.format(f)
ndel += 1
print '\n\n\n'
if not REALMODE:
print ' Fake mode - no actual deletions '
print 'Deleted: ', ndel, ' files'