import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

csv_path = "flock2025_raw.csv"
df = pd.read_csv(csv_path)
df.columns = df.columns.str.strip()

ROLE_LABELS = {
    "What are your current role(s) in Fedora Project? [Community Ops]": "Role: Community Ops",
    "What are your current role(s) in Fedora Project? [Design]": "Role: Design",
    "What are your current role(s) in Fedora Project? [Documentation]": "Role: Documentation",
    "What are your current role(s) in Fedora Project? [Globalization (Translation, Internationalization, Localization)]": "Role: Globalization (i18n/l10n)",
    "What are your current role(s) in Fedora Project? [Infrastructure]": "Role: Infrastructure",
    "What are your current role(s) in Fedora Project? [Mentored Projects (Mentors, Applicants, Interns, Coordinators)]": "Role: Mentored Projects",
    "What are your current role(s) in Fedora Project? [Outreach (Ambassadors, Join SIG, DEI Team)]": "Role: Outreach & Ambassadors",
    "What are your current role(s) in Fedora Project? [Package Maintainer]": "Role: Package Maintainer",
    "What are your current role(s) in Fedora Project? [Quality Assurance/Testing]": "Role: Quality Assurance (QA)",
    "What are your current role(s) in Fedora Project? [User Support (Ask Fedora, mailing lists, #fedora on fedoraproject.org Matrix, etc.)]": "Role: User Support",
    "What are your current role(s) in Fedora Project? [Fedora Linux User]": "Role: Fedora User",
}
PAST_ATTENDANCE_LABELS = {
    "Have you attended previous in-person Flock to Fedora events? [2025 (Prague, Czechia)]": "Flock 2025 (Prague)",
    "Have you attended previous in-person Flock to Fedora events? [2024 (Rochester, New York)]": "Flock 2024 (Rochester)",
    "Have you attended previous in-person Flock to Fedora events? [2023 (Cork, Ireland)]": "Flock 2023 (Cork)",
    "Have you attended previous in-person Flock to Fedora events? [2019 (Budapest, Hungary)]": "Flock 2019 (Budapest)",
    "Have you attended previous in-person Flock to Fedora events? [2018 (Dresden, Germany)]": "Flock 2018 (Dresden)",
    "Have you attended previous in-person Flock to Fedora events? [2017 (Cape Cod, Massachusetts, USA)]": "Flock 2017 (Cape Cod)",
    "Have you attended previous in-person Flock to Fedora events? [2016 (Krakow, Poland)]": "Flock 2016 (Krakow)",
    "Have you attended previous in-person Flock to Fedora events? [2015 (Rochester, New York, USA)]": "Flock 2015 (Rochester)",
    "Have you attended previous in-person Flock to Fedora events? [2014 (Prague, Czechia)]": "Flock 2014 (Prague)",
    "Have you attended previous in-person Flock to Fedora events? [2013 (Charleston, South Carolina, USA)]": "Flock 2013 (Charleston)",
    "Have you attended previous in-person Flock to Fedora events? [I never attended a previous in-person Flock to Fedora]": "Never Attended Flock Before",
    "If you attended the social activities at Flock 2025, please indicate if you enjoyed the event or not, and provide any feedback you would like to share with the organisers about each. [I did not attend any]": "Did Not Attend Any 2025 Social Events"
}

SOCIAL_EVENT_GROUPS = {
    "Walking Tour": {
        "enjoyed": "If you attended the social activities at Flock 2025, please indicate if you enjoyed the event or not, and provide any feedback you would like to share with the organisers about each. [I attended and enjoyed the walking tour]",
        "not_enjoyed": "If you attended the social activities at Flock 2025, please indicate if you enjoyed the event or not, and provide any feedback you would like to share with the organisers about each. [I attended and did not enjoy the walking tour]",
    },
    "Candy Swap & Games": {
        "enjoyed": "If you attended the social activities at Flock 2025, please indicate if you enjoyed the event or not, and provide any feedback you would like to share with the organisers about each. [I attended and enjoyed the candy swap and games night]",
        "not_enjoyed": "If you attended the social activities at Flock 2025, please indicate if you enjoyed the event or not, and provide any feedback you would like to share with the organisers about each. [I attended and did not enjoy the candy swap and games night]",
    },
    "River Cruise": {
        "enjoyed": "If you attended the social activities at Flock 2025, please indicate if you enjoyed the event or not, and provide any feedback you would like to share with the organisers about each. [I attended and enjoyed the social night (river cruise)]",
        "not_enjoyed": "If you attended the social activities at Flock 2025, please indicate if you enjoyed the event or not, and provide any feedback you would like to share with the organisers about each. [I attended and did not enjoy the social night (river cruise)]",
    },
}

SESSION_LABELS = {
    "Rate each type of session from 1 - 5, where 5 is most valuable to you and 1 is least valuable to you.  [Keynotes]": "Keynotes",
    "Rate each type of session from 1 - 5, where 5 is most valuable to you and 1 is least valuable to you.  [Presentations]": "Presentations",
    "Rate each type of session from 1 - 5, where 5 is most valuable to you and 1 is least valuable to you.  [Workshops, Bofs, Hackfests]": "Workshops & Hackfests",
    "Rate each type of session from 1 - 5, where 5 is most valuable to you and 1 is least valuable to you.  [Hallway track]": "Hallway Track",
    "Rate each type of session from 1 - 5, where 5 is most valuable to you and 1 is least valuable to you.  [Evening social activities]": "Social Activities",
}

df.rename(columns={**ROLE_LABELS, **SESSION_LABELS}, inplace=True)

role_cols = list(ROLE_LABELS.values())
role_long = df.melt(id_vars=["Response ID"], value_vars=role_cols,
                    var_name="Role", value_name="Selected")
role_long = role_long[role_long["Selected"] == "Yes"]

session_cols = list(SESSION_LABELS.values())
session_long = df[session_cols].melt(var_name="Session Type", value_name="Rating").dropna()


past_flock_columns = [
    "Have you attended previous in-person Flock to Fedora events? [2025 (Prague, Czechia)]",
    "Have you attended previous in-person Flock to Fedora events? [2024 (Rochester, New York)]",
    "Have you attended previous in-person Flock to Fedora events? [2023 (Cork, Ireland)]",
    "Have you attended previous in-person Flock to Fedora events? [2019 (Budapest, Hungary)]",
    "Have you attended previous in-person Flock to Fedora events? [2018 (Dresden, Germany)]",
    "Have you attended previous in-person Flock to Fedora events? [2017 (Cape Cod, Massachusetts, USA)]",
    "Have you attended previous in-person Flock to Fedora events? [2016 (Krakow, Poland)]",
    "Have you attended previous in-person Flock to Fedora events? [2015 (Rochester, New York, USA)]",
    "Have you attended previous in-person Flock to Fedora events? [2014 (Prague, Czechia)]",
    "Have you attended previous in-person Flock to Fedora events? [2013 (Charleston, South Carolina, USA)]",
]

never_attended_col = "Have you attended previous in-person Flock to Fedora events? [I never attended a previous in-person Flock to Fedora]"
df["Flock Count"] = df[past_flock_columns].apply(lambda row: sum(row == "Yes"), axis=1)

def classify_flock_attendance(row):
    if row.get(never_attended_col) == "Yes":
        return "Never Attended"
    count = row["Flock Count"]
    if count == 1:
        return "1 Flock"
    elif 2 <= count <= 3:
        return "2–3 Flocks"
    elif 4 <= count <= 5:
        return "4–5 Flocks"
    elif count >= 6:
        return "6+ Flocks"
    else:
        return "Unknown"

df["Flock Attendance Group"] = df.apply(classify_flock_attendance, axis=1)

plt.figure(figsize=(10, 6))
role_counts = role_long["Role"].value_counts()
sns.barplot(x=role_counts.values, y=role_counts.index)
plt.title("Distribution of Fedora Roles at Flock 2025")
plt.xlabel("Respondent Count")
plt.tight_layout()

plt.figure(figsize=(10, 6))
session_means = session_long.groupby("Session Type")["Rating"].mean().sort_values()
sns.barplot(x=session_means.values, y=session_means.index)
plt.title("Average Rating by Session Type")
plt.xlabel("Mean Rating (1–5)")
plt.tight_layout()

filtered = session_by_role[session_by_role["Respondents"] >= 5]
plt.figure(figsize=(10, 6))
sns.heatmap(filtered[session_cols], annot=True, cmap="YlGnBu", vmin=1, vmax=5, cbar_kws={'label': 'Average Rating'})

plt.title("Session Ratings by Fedora Role")
plt.xlabel("Session Type")
plt.ylabel("Fedora Role")
plt.tight_layout()
plt.show()

roles = list(ROLE_LABELS.values())
social_data = []

for role in roles:
    role_df = df[df[role] == "Yes"]
    if len(role_df) < 5:
        continue

    totals = role_df.shape[0]
    event_counts = {event: {"Enjoyed": 0, "Not enjoyed": 0} for event in SOCIAL_EVENT_GROUPS}

    for event, field_map in SOCIAL_EVENT_GROUPS.items():
        for result_key, field in field_map.items():
            if field in role_df.columns:
                counts = role_df[field].value_counts()
                if "Yes" in counts:
                    event_counts[event]["Enjoyed" if result_key == "enjoyed" else "Not enjoyed"] += counts["Yes"]

    percents = {
        event: 100 * counts["Enjoyed"] / (counts["Enjoyed"] + counts["Not enjoyed"])
        if (counts["Enjoyed"] + counts["Not enjoyed"]) > 0 else None
        for event, counts in event_counts.items()
    }
    percents["Role"] = role
    social_data.append(percents)

social_heatmap_df = pd.DataFrame(social_data).set_index("Role").sort_index()
social_heatmap_clean = social_heatmap_df.apply(pd.to_numeric, errors='coerce')
social_heatmap_clean = social_heatmap_clean.select_dtypes(include=['number'])

plt.figure(figsize=(10, 6))
sns.heatmap(social_heatmap_clean, annot=True, fmt=".0f", cmap="Greens", vmin=0, vmax=100, cbar_kws={'label': '% Enjoyed'})
plt.title("Social Event Enjoyment by Fedora Role")
plt.xlabel("Social Event")
plt.ylabel("Fedora Role")
plt.tight_layout()
plt.show()

session_long = df.melt(id_vars=["Flock Attendance Group"], value_vars=session_cols,
                       var_name="Session Type", value_name="Rating")

session_long = session_long.dropna(subset=["Rating"])

heatmap_df = session_long.groupby(["Flock Attendance Group", "Session Type"])["Rating"].mean().unstack()

plt.figure(figsize=(10, 4))
plt.tight_layout(pad=2.0)
sns.heatmap(heatmap_df, annot=True, fmt=".2f", cmap="Blues", vmin=1, vmax=5, cbar_kws={'label': 'Average Rating'})
plt.title("Session Ratings by Flock Attendance History")
plt.xlabel("Session Type")
plt.ylabel("Flock Attendance Group")
plt.tight_layout()
plt.show()

import seaborn as sns
import matplotlib.pyplot as plt

year_labels = {
    "Have you attended previous in-person Flock to Fedora events? [2025 (Prague, Czechia)]": "2025 (Prague)",
    "Have you attended previous in-person Flock to Fedora events? [2024 (Rochester, New York)]": "2024 (Rochester)",
    "Have you attended previous in-person Flock to Fedora events? [2023 (Cork, Ireland)]": "2023 (Cork)",
    "Have you attended previous in-person Flock to Fedora events? [2019 (Budapest, Hungary)]": "2019 (Budapest)",
    "Have you attended previous in-person Flock to Fedora events? [2018 (Dresden, Germany)]": "2018 (Dresden)",
    "Have you attended previous in-person Flock to Fedora events? [2017 (Cape Cod, Massachusetts, USA)]": "2017 (Cape Cod)",
    "Have you attended previous in-person Flock to Fedora events? [2016 (Krakow, Poland)]": "2016 (Krakow)",
    "Have you attended previous in-person Flock to Fedora events? [2015 (Rochester, New York, USA)]": "2015 (Rochester)",
    "Have you attended previous in-person Flock to Fedora events? [2014 (Prague, Czechia)]": "2014 (Prague)",
    "Have you attended previous in-person Flock to Fedora events? [2013 (Charleston, South Carolina, USA)]": "2013 (Charleston)",
}

attendance_binary = df[past_flock_columns].applymap(lambda x: 1 if x == "Yes" else 0)
attendance_binary["Flock Attendance Group"] = df["Flock Attendance Group"]

heatmap_df = attendance_binary.groupby("Flock Attendance Group").sum()
heatmap_df = heatmap_df.rename(columns=year_labels)
heatmap_df = heatmap_df[sorted(heatmap_df.columns, reverse=True)]  # Sort years descending

plt.figure(figsize=(12, 10))
plt.tight_layout(pad=2.0)

sns.heatmap(heatmap_df, annot=True, fmt=".0f", cmap="YlGnBu", cbar_kws={"label": "Count Attended"})
plt.title("Attendance by Year and Flock Attendance Group")
plt.xlabel("Flock Year")
plt.ylabel("Flock Attendance Group")
plt.show()

/var/folders/6l/y3gf6c4x7_z05chdhcr5_3jc0000gn/T/ipykernel_25710/3776986938.py:17: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.
  attendance_binary = df[past_flock_columns].applymap(lambda x: 1 if x == "Yes" else 0)

value_order = ["Not valuable at all", "Somewhat valuable", "Valuable", "Extremely valuable"]

value_col = "How valuable were the sessions at Flock to Fedora 2025 in inspiring and informing you about the future of Fedora?"
heatmap_role_data = []

for role in ROLE_LABELS.values():
    role_df = df[df[role] == "Yes"]
    if len(role_df) < 5:
        continue
    counts = role_df[value_col].value_counts(normalize=True) * 100
    row = {k: counts.get(k, 0) for k in value_order}
    row["Role"] = role
    heatmap_role_data.append(row)

hm_role_df = pd.DataFrame(heatmap_role_data).set_index("Role")[value_order]
plt.figure(figsize=(10, 6))
sns.heatmap(hm_role_df, annot=True, fmt=".1f", cmap="YlGnBu", vmin=0, vmax=100, cbar_kws={'label': '% of Respondents'})
plt.title("Perceived Session Value by Fedora Role")
plt.xlabel("Perceived Value")
plt.ylabel("Fedora Role")
plt.tight_layout()
plt.show()

heatmap_flock_data = []

for group in df["Flock Attendance Group"].dropna().unique():
    group_df = df[df["Flock Attendance Group"] == group]
    if len(group_df) < 5:
        continue
    counts = group_df[value_col].value_counts(normalize=True) * 100
    row = {k: counts.get(k, 0) for k in value_order}
    row["Attendance Group"] = group
    heatmap_flock_data.append(row)

hm_flock_df = pd.DataFrame(heatmap_flock_data).set_index("Attendance Group")[value_order]
attendance_order = ["Never Attended", "1 Flock", "2–3 Flocks", "4–5 Flocks", "6+ Flocks"]
hm_flock_df = hm_flock_df.reindex(attendance_order)
plt.figure(figsize=(8, 5))
sns.heatmap(hm_flock_df, annot=True, fmt=".1f", cmap="YlGnBu", vmin=0, vmax=100, cbar_kws={'label': '% of Respondents'})
plt.title("Perceived Session Value by Flock Attendance Group")
plt.xlabel("Perceived Value")
plt.ylabel("Attendance Group")
plt.tight_layout()
plt.show()

hallway_question = "How important is the “hallway track” experience to you?"
hallway_order = [
    "This is the most important part of Flock to Fedora for me",
    "Important",
    "Somewhat important",
]
role_data = []

for role in ROLE_LABELS.values():
    role_df = df[df[role] == "Yes"]
    if len(role_df) < 5:
        continue

    counts = role_df[hallway_question].value_counts(normalize=True) * 100
    row = {k: counts.get(k, 0) for k in hallway_order}
    row["Role"] = role
    role_data.append(row)

hallway_role_df = pd.DataFrame(role_data).set_index("Role")[hallway_order]

plt.figure(figsize=(13, 6))
sns.heatmap(hallway_role_df, annot=True, fmt=".1f", cmap="Purples", vmin=0, vmax=100, cbar_kws={"label": "% of Respondents"})
plt.title("Importance of Hallway Track by Fedora Role")
plt.xlabel("Importance Level")
plt.ylabel("Fedora Role")
plt.tight_layout(pad=2.0)
plt.show()

flock_data = []

for group in df["Flock Attendance Group"].dropna().unique():
    group_df = df[df["Flock Attendance Group"] == group]
    if len(group_df) < 5:
        continue

    counts = group_df[hallway_question].value_counts(normalize=True) * 100
    row = {k: counts.get(k, 0) for k in hallway_order}
    row["Attendance Group"] = group
    flock_data.append(row)

hallway_flock_df = pd.DataFrame(flock_data).set_index("Attendance Group")[hallway_order]

# FIX THE ORDER
attendance_order = ["Never Attended", "1 Flock", "2–3 Flocks", "4–5 Flocks", "6+ Flocks"]
hallway_flock_df = hallway_flock_df.reindex(attendance_order)

plt.figure(figsize=(10, 5))
sns.heatmap(hallway_flock_df, annot=True, fmt=".1f", cmap="Oranges", vmin=0, vmax=100, cbar_kws={"label": "% of Respondents"})
plt.title("Importance of Hallway Track by Flock Attendance Group")
plt.xlabel("Importance Level")
plt.ylabel("Attendance Group")
plt.tight_layout(pad=2.0)
plt.show()

/var/folders/6l/y3gf6c4x7_z05chdhcr5_3jc0000gn/T/ipykernel_25710/243869571.py:52: UserWarning: Tight layout not applied. The bottom and top margins cannot be made large enough to accommodate all Axes decorations.
  plt.tight_layout(pad=2.0)