In [1]: df = pd.DataFrame( {'a':['A','A','B','B','B','C'], 'b':[1,2,5,5,4,6]})
df
Out[1]:
a b
0 A 1
1 A 2
2 B 5
3 B 5
4 B 4
5 C 6
In [2]: df.groupby('a')['b'].apply(list)
Out[2]:
a
A [1, 2]
B [5, 5, 4]
C [6]
Name: b, dtype: object
In [3]: df1 = df.groupby('a')['b'].apply(list).reset_index(name='new')
df1
Out[3]:
a new
0 A [1, 2]
1 B [5, 5, 4]
2 C [6]
# usage example
gb = df.groupby(["col1", "col2"])
counts = gb.size().to_frame(name="counts")
count
(
counts.join(gb.agg({"col3": "mean"}).rename(columns={"col3": "col3_mean"}))
.join(gb.agg({"col4": "median"}).rename(columns={"col4": "col4_median"}))
.join(gb.agg({"col4": "min"}).rename(columns={"col4": "col4_min"}))
.reset_index()
)
# to create dataframe
keys = np.array(
[
["A", "B"],
["A", "B"],
["A", "B"],
["A", "B"],
["C", "D"],
["C", "D"],
["C", "D"],
["E", "F"],
["E", "F"],
["G", "H"],
]
)
df = pd.DataFrame(
np.hstack([keys, np.random.randn(10, 4).round(2)]), columns=["col1", "col2", "col3", "col4", "col5", "col6"]
)
df[["col3", "col4", "col5", "col6"]] = df[["col3", "col4", "col5", "col6"]].astype(float)