Skip to content

Commit

Permalink
Merge pull request #7 from aoki-h-jp/feature/1.0.0/information-correl…
Browse files Browse the repository at this point in the history
…ation

Feature/1.0.0/information correlation
  • Loading branch information
aoki-h-jp authored Sep 3, 2023
2 parents 32ef425 + 783ca35 commit d499c05
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 28 deletions.
8 changes: 4 additions & 4 deletions crypto_features/feature/information_correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ def run_calculate(
if not os.path.exists("information_correlation"):
os.mkdir("information_correlation")

klines = klines.copy()
feature = feature.copy()

close_chg_pct_header = f"close_chg_pct_after_{return_minutes}min"
klines[close_chg_pct_header] = klines["close"].pct_change(return_minutes)
klines["close"] = klines["close"].astype(float)
klines[close_chg_pct_header] = klines["close"].pct_change(
return_minutes, fill_method="bfill"
)
klines[close_chg_pct_header] = klines[close_chg_pct_header].shift(
-return_minutes
)
Expand Down
92 changes: 68 additions & 24 deletions crypto_features/feature/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,29 +29,7 @@ def _load_klines_data(self, symbol) -> pd.DataFrame:
:return: preprocessed klines data
"""
# Load klines data
# merge all csv files
df = pd.DataFrame()
for file in os.listdir(
os.path.join(self._data_dir, self._BINANCE_KLINES_DIR, symbol, "1m")
):
df = pd.concat(
[
df,
pd.read_csv(
"/".join(
[
self._data_dir,
self._BINANCE_KLINES_DIR,
symbol,
"1m",
file,
]
)
),
]
)

df.columns = [
headers = [
"timestamp_open",
"open",
"high",
Expand All @@ -65,8 +43,74 @@ def _load_klines_data(self, symbol) -> pd.DataFrame:
"taker_buy_quote_volume",
"ignore",
]
df["timestamp_open"] = pd.to_datetime(df["timestamp_open"], utc=True, unit="ms")

raw_headers = [
"open_time",
"open",
"high",
"low",
"close",
"volume",
"close_time",
"quote_volume",
"count",
"taker_buy_volume",
"taker_buy_quote_volume",
"ignore",
]

# merge all csv files
df = pd.DataFrame(columns=headers)
for file in os.listdir(
os.path.join(self._data_dir, self._BINANCE_KLINES_DIR, symbol, "1m")
):
# header check
df_append_tmp = pd.read_csv(
"/".join(
[
self._data_dir,
self._BINANCE_KLINES_DIR,
symbol,
"1m",
file,
]
),
nrows=1,
)

if list(df_append_tmp) != raw_headers:
df_append = pd.read_csv(
"/".join(
[
self._data_dir,
self._BINANCE_KLINES_DIR,
symbol,
"1m",
file,
]
),
names=headers,
)
else:
df_append = pd.read_csv(
"/".join(
[
self._data_dir,
self._BINANCE_KLINES_DIR,
symbol,
"1m",
file,
]
),
header=None,
)
df_append = df_append.drop(0, axis=0)
df_append.columns = headers

df = pd.concat([df, df_append])

df.set_index("timestamp_open", inplace=True)
df.index = pd.to_datetime(df.index, utc=True, unit="ms")

return df

Expand Down

0 comments on commit d499c05

Please sign in to comment.