---title: Bugs Edited by Affiliationdescription: This analysis visualizes communities who are making edits to bugs in Bugzilla based on if you have a redhat.com email or a non-RH.com email.date: 2025-10-12---
{'title': 'Bugs Edited by Affiliation', 'refresh': 'weekly', 'onIndex': 0}
This analysis visualizes communities who are making edits to bugs in Bugzilla based on if you have a redhat.com email or a non-RH.com email. This may not be accurate for some community members who do RH work and use their RH email but not as a part of their role. It also might not be correct for shadow RH users who are a personal email for community work. At best this is a estimation.
Show the code
# common fedora commops analytics includesimport pyarrow.dataset as dsimport pyarrow.parquet as pqimport pandas as pdimport pyarrow as paimport matplotlib.pyplot as pltimport seaborn as snsimport jsonfrom datetime import datetimefrom collections import defaultdictimport osfrom pyarrow import fsimport pyarrow.dataset as dsfrom pathlib import Path
Show the code
# @replace DATA_SOURCESDATA_SOURCES = {"datagrepper-topics": "/home/jovyan/work/bus2parquet/output_parquets"}parquet_dir = DATA_SOURCES["datagrepper-topics"]topic ="org.fedoraproject.prod.bugzilla.bug.update"cutoff_date = (pd.Timestamp.now().replace(day=1) - pd.DateOffset(months=12)).date()files = []for p in Path(f"{parquet_dir}/{topic}").glob("fedora-*.parquet"): stem = p.stem.replace(f"-{topic}", "") d = datetime.strptime(stem.split("-")[1], "%Y%m%d").date()# if d >= cutoff_date and os.path.getsize(p) > 0:if os.path.getsize(p) >0: files.append(str(p))local_fs = fs.LocalFileSystem()tables = []# First pass: collect all schemasall_fields = {}for f in files:try: tbl = pq.read_table(f)for name in tbl.schema.names: all_fields[name] = pa.string() # force everything to stringexceptExceptionas e:print(f"[WARN] Skipping {f}: {e}")# Build unified schemaunified_schema = pa.schema([pa.field(name, pa.string()) for name insorted(all_fields)])# Second pass: cast each table to unified schemafor f in files:try: tbl = pq.read_table(f)# Cast existing columns to string casted = {}for name in tbl.schema.names: col = tbl[name].cast(pa.string()) casted[name] = col# Add missing columns as null stringsfor name in unified_schema.names:if name notin casted: casted[name] = pa.array([None] *len(tbl), type=pa.string())# Build new table with unified schema new_tbl = pa.table([casted[name] for name in unified_schema.names], schema=unified_schema) tables.append(new_tbl)exceptExceptionas e:print(f"[WARN] Skipping {f}: {e}")if tables: table = pa.concat_tables(tables, promote=True) df = table.to_pandas()print(f"Loaded {len(df)} bug update records from {len(tables)} tables.")else:print("No valid parquet files found")
/opt/conda/lib/python3.11/site-packages/IPython/core/interactiveshell.py:3526: FutureWarning: promote has been superseded by promote_options='default'.
exec(code_obj, self.user_global_ns, self.user_ns)
import matplotlib.pyplot as pltdef plot(df, title): df.sort_index().plot(kind="bar", color={"Red Hat": "red", "Community": "blue"}, stacked=True, figsize=(12,6)) plt.title(title) plt.ylabel("Number of Bugs") plt.xlabel("Month or Priority") plt.xticks(rotation=45) plt.tight_layout() plt.show()
Show the code
plot(bugs_opened, "Bugs Opened: Red Hat vs Community")plot(bugs_closed, "Bugs Closed: Red Hat vs Community")plot(bugs_edited, "Bugs Edited: Red Hat vs Community")