df_filtered = df.query('a > 0 and 0 < b < 2')
df = pd.DataFrame(np.random.randn(30, 3), columns=['a','b','c'])
df_filtered = df.query('a > 0').query('0 < b < 2')
df_filtered = df.query('a > 0 and 0 < b < 2')
#If you need to refer to python variables in your query, the documentation says, "You can refer to variables in the environment by prefixing them with an ‘@’ character like @a + b". Note that the following are valid: df.query('a in list([1,2])'), s = set([1,2]); df.query('a in @s'). –
#teichert
df[df["column_name"] != 5].groupby("other_column_name")
import pandas as pd
import numpy as np
np.random.seed([3,1415])
df = pd.DataFrame(
np.random.randint(10, size=(10, 5)),
columns=list('ABCDE')
)
df
A B C D E
0 0 2 7 3 8
1 7 0 6 8 6
2 0 2 0 4 9
3 7 3 2 4 3
4 3 6 7 7 4
5 5 3 7 5 9
6 8 7 6 4 7
7 6 2 6 6 5
8 2 8 7 5 8
9 4 7 6 1 5
df.query('D > B')
A B C D E
0 0 2 7 3 8
1 7 0 6 8 6
2 0 2 0 4 9
3 7 3 2 4 3
4 3 6 7 7 4
5 5 3 7 5 9
7 6 2 6 6 5
df.query('D > B').query('C > B')
# equivalent to
# df.query('D > B and C > B')
# but defeats the purpose of demonstrating chaining
A B C D E
0 0 2 7 3 8
1 7 0 6 8 6
4 3 6 7 7 4
5 5 3 7 5 9
7 6 2 6 6 5
df_filtered = df.loc[lambda x: x['column'] == value]
df_filtered = df.pipe(lambda x: x['column'] == value)
uniquevalues = np.unique(df[['id']].values)