Create a dataframe from a dict
mydict = {'Name': 'Jax', 'Age': 'Teller', 'City': 'Antioch'}
df = pd.DataFrame(mydict, index=[0]) # 0th row
df2 = pd.DataFrame.from_dict(mydict, orient='index') # 0th column
Make Sure It Doesn’t Contain NaN or Empty
print("Does it contain NaN: ", df["COLUMN"].isna().values.any() or df.empty)
# Rows that contain nan values
print(df[df.isna().any(axis=1)])
Drop rows with NaN values in the required columns
df = df.dropna(subset=required_columns)
Check something if it’s in columns
if column not in train.columns:
print(f"{column} is not in columns")
Convert to datetime format
df['date'] = pd.to_datetime(df['date'])
# check if pandas.to_datetime is working
print("Datetime: ", df['date'].dtype)
Drop columns except wanted ones
df = df[['FECHA_MEDIDA', 'MEDIDA_numeric']]
Drop columns
df = df.drop(columns=['low_fats', 'recyclable'])
Get row info from a df
df[(df["PERFIL"] == "Suleyman") & (df["PROYECTO"] == "OTRAS ACTIVIDADES")]
Ensure column names are stripped of spaces
df.columns = df.columns.str.strip()
Strip and lower the column names