Show the code
import pandas as pd
import plotly.express as px
csv_path = "PackageData.csv"
df = pd.read_csv(csv_path, sep=";")
df["first_event"] = pd.to_datetime(df["first_event"])
df["last_event"] = pd.to_datetime(df["last_event"])
df["qtr"] = pd.to_datetime(df["qtr"])
df["qtr_label"] = df["qtr"].dt.to_period("Q").astype(str)
print("Unique quarters (qtr_label) and counts:\n")
print(df["qtr_label"].value_counts().sort_index(), "\n")
print("Number of distinct quarters per package (value_counts of that):\n")
quarters_per_pkg = df.groupby("package")["qtr_label"].nunique()
print(quarters_per_pkg.value_counts().sort_index(), "\n")
df_sorted = df.sort_values(["package", "qtr"])
df_sorted["delta_events"] = df_sorted.groupby("package")["total_events"].diff().fillna(0)
df_sorted["delta_authors"] = df_sorted.groupby("package")["distinct_commit_authors"].diff().fillna(0)
print("Summary of quarter-over-quarter changes (delta_events, delta_authors):\n")
print(df_sorted[["delta_events", "delta_authors"]].describe(), "\n")
multi_q_pkg = quarters_per_pkg[quarters_per_pkg > 1].index[:1]
print(f"Sample package with multiple quarters: {list(multi_q_pkg)}\n")
print(df_sorted[df_sorted["package"].isin(multi_q_pkg)][
["package", "qtr_label", "total_events", "distinct_commit_authors", "delta_events", "delta_authors"]
])
TOP_N_PACKAGES = 250
pkg_totals = df.groupby("package")["total_events"].sum().sort_values(ascending=False)
top_pkgs = pkg_totals.head(TOP_N_PACKAGES).index
df_anim = df[df["package"].isin(top_pkgs)].copy()
df_anim = df_anim.sort_values(["qtr", "package"])
fig = px.scatter(
df_anim,
x="total_events",
y="distinct_commit_authors",
animation_frame="qtr_label",
animation_group="package",
size="total_events",
color="total_events",
hover_data=[
"package",
"qtr_label",
"total_events",
"distinct_commit_authors",
"git_pushes",
"pr_opened",
"pr_merged",
],
title="Fedora package activity per quarter",
)
fig.update_layout(
xaxis_title="Total events in this quarter",
yaxis_title="Distinct commit authors in this quarter",
width=1400,
height=800,
)
fig.show()Unique quarters (qtr_label) and counts:
qtr_label
2022Q1 23726
2022Q2 14917
2022Q3 19694
2022Q4 9974
2023Q1 23763
2023Q2 13462
2023Q3 23787
2023Q4 9153
2024Q1 24365
2024Q2 15243
2024Q3 25815
2024Q4 10150
2025Q1 24974
2025Q2 12045
2025Q3 24957
2025Q4 6186
Name: count, dtype: int64
Number of distinct quarters per package (value_counts of that):
qtr_label
1 1889
2 1460
3 1377
4 1538
5 1355
6 1007
7 1066
8 2065
9 4064
10 3725
11 3006
12 2621
13 1755
14 1913
15 1573
16 1174
Name: count, dtype: int64
Summary of quarter-over-quarter changes (delta_events, delta_authors):
delta_events delta_authors
count 282211.000000 282211.000000
mean -0.131501 -0.042575
std 27.785942 1.017346
min -4431.000000 -8.000000
25% -2.000000 -1.000000
50% 0.000000 0.000000
75% 2.000000 0.000000
max 4568.000000 7.000000
Sample package with multiple quarters: ['rpms/0ad']
package qtr_label total_events distinct_commit_authors delta_events \
0 rpms/0ad 2022Q1 4 1 0.0
1 rpms/0ad 2022Q2 11 1 7.0
2 rpms/0ad 2022Q3 44 4 33.0
3 rpms/0ad 2022Q4 21 3 -23.0
4 rpms/0ad 2023Q1 3 1 -18.0
5 rpms/0ad 2023Q2 12 2 9.0
6 rpms/0ad 2023Q3 4 2 -8.0
7 rpms/0ad 2023Q4 7 2 3.0
8 rpms/0ad 2024Q1 17 5 10.0
9 rpms/0ad 2024Q2 1 0 -16.0
10 rpms/0ad 2024Q3 4 2 3.0
11 rpms/0ad 2024Q4 6 2 2.0
12 rpms/0ad 2025Q1 17 3 11.0
13 rpms/0ad 2025Q2 1 0 -16.0
14 rpms/0ad 2025Q3 18 3 17.0
15 rpms/0ad 2025Q4 6 1 -12.0
delta_authors
0 0.0
1 0.0
2 3.0
3 -1.0
4 -2.0
5 1.0
6 0.0
7 0.0
8 3.0
9 -5.0
10 2.0
11 0.0
12 1.0
13 -3.0
14 3.0
15 -2.0