Skip to content

Commit

Permalink
field name is different from category
Browse files Browse the repository at this point in the history
  • Loading branch information
aliciaaevans committed Jul 31, 2024
1 parent e2dfd98 commit 9973244
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 15 deletions.
26 changes: 13 additions & 13 deletions src/build_plots/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
logger = getLogger(__name__)


def buildDailyPlot(category, max_packages, days_to_plot):
def buildDailyPlot(category, field, max_packages, days_to_plot):
if not os.path.exists("plots"):
os.makedirs("plots")

Expand All @@ -32,11 +32,11 @@ def buildDailyPlot(category, max_packages, days_to_plot):

df = pd.read_csv(
f"bioconda-stats/package-downloads/anaconda.org/bioconda/{category}/{filename}",
dtype={ category: str, "total": int },
dtype={ field: str, "total": int },
encoding="utf-8",
sep="\t",
)
versions = set(df[category])
versions = set(df[field])
prev_tagname = tags[len(tags) - 1].name

# Get tags going back 15 days (or as specified in arg)
Expand All @@ -59,17 +59,17 @@ def buildDailyPlot(category, max_packages, days_to_plot):
logger.debug(f"Found data for {package} from date {tagref.name}.")
new_df = pd.read_csv(
io.BytesIO(blob.data_stream.read()),
dtype={ category: str, "total": int },
dtype={ field: str, "total": int },
encoding="utf-8",
sep="\t"
)
# do a delta between totals of different dates
versions = versions | set(new_df[category])
df_sub = df.set_index(category).subtract(
new_df.set_index(category), fill_value=0
versions = versions | set(new_df[field])
df_sub = df.set_index(field).subtract(
new_df.set_index(field), fill_value=0
)
df_sub.rename(columns={"total": "delta"}, inplace=True)
df = df.merge(df_sub, on=category)
df = df.merge(df_sub, on=field)
df["date"] = prev_tagname
package_df = pd.concat([package_df, df], ignore_index=True)
df = new_df
Expand All @@ -79,12 +79,12 @@ def buildDailyPlot(category, max_packages, days_to_plot):
# Get 7 most recent versions, sorting by VersionOrder
if category == "versions":
version_list = sorted(version_list, key=VersionOrder)[-7:]
package_df[category] = pd.Categorical(
package_df[category], ordered=True, categories=version_list
package_df[field] = pd.Categorical(
package_df[field], ordered=True, categories=version_list
)
package_df = package_df[package_df[category].notna()].sort_values(
by=[category, "date"]
)[["date", "total", "delta", category]]
package_df = package_df[package_df[field].notna()].sort_values(
by=[field, "date"]
)[["date", "total", "delta", field]]

# Save plot data
if not os.path.exists(f"plots/{package}"):
Expand Down
2 changes: 1 addition & 1 deletion src/build_plots/plot_platforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@
if len(sys.argv) > 2 and sys.argv[2]:
max_packages = int(sys.argv[2])

buildDailyPlot("platforms", max_packages, days_to_plot)
buildDailyPlot("platforms", "platform", max_packages, days_to_plot)
2 changes: 1 addition & 1 deletion src/build_plots/plot_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@
if len(sys.argv) > 2 and sys.argv[2]:
max_packages = int(sys.argv[2])

buildDailyPlot("versions", max_packages, days_to_plot)
buildDailyPlot("versions", "version", max_packages, days_to_plot)

0 comments on commit 9973244

Please sign in to comment.