An Introduction to data visualization with Seaborn
This is part of my learning journey towards data science through Datacamp.
# Import Matplotlib and Seaborn
import matplotlib.pyplot as plt
import seaborn as sns
# Create scatter plot with GDP on the x-axis and number of phones on the y-axis
sns.scatterplot(x=gdp,y=phones)
plt.show()
Count plot
# Create count plot with region on the y-axis
sns.countplot(y=region)
# Show plot
plt.show()
# Create a DataFrame from csv file
df=pd.read_csv(csv_filepath)
# Create a count plot with "Spiders" on the x-axis
sns.countplot(x="Spiders",data=df)
# Display the plot
plt.show()
Third variable with Hue
# Import Matplotlib and Seaborn
import matplotlib.pyplot as plt
import seaborn as sns
# Create a scatter plot of absences vs. final grade
sns.scatterplot(x="absences",y="G3",data=student_data,hue="location")
# Show plot
plt.show()
# Change the legend order in the scatter plot
sns.scatterplot(x="absences", y="G3",
data=student_data,
hue="location",hue_order=["Rural","Urban"])
# Show plot
plt.show()
It looks like students with higher absences tend to have lower grades in both rural and urban areas.
import matplotlib.pyplot as plt
import seaborn as sns
# Create a dictionary mapping subgroup values to colors
palette_colors = {"Rural": "green", "Urban": "blue"}
# Create a count plot of school with location subgroups
sns.countplot(x="school",data=student_data,hue="location",palette=palette_colors)
# Display plot
plt.show()
Relational plot
# Change to use relplot() instead of scatterplot()
sns.relplot(x="absences", y="G3",
data=student_data,kind="scatter")
# Show plot
plt.show()
# Change to make subplots based on study time
sns.relplot(x="absences", y="G3",
data=student_data,
kind="scatter",col="study_time")
# Show plot
plt.show()
# Change this scatter plot to arrange the plots in rows instead of columns
sns.relplot(x="absences", y="G3",
data=student_data,
kind="scatter",
row="study_time")
# Show plot
plt.show()
Creating two-factor subplots
# Create a scatter plot of G1 vs. G3
sns.relplot(x="G1",y="G3",data=student_data)
,kind="scatter"
# Show plot
plt.show()
# Adjust to add subplots based on school support
sns.relplot(x="G1", y="G3", data=student_data,
kind="scatter",col="schoolsup")
,col_order=["yes","no"]
# Show plot
plt.show()
# Adjust further to add subplots based on family support
sns.relplot(x="G1", y="G3",
data=student_data,
kind="scatter",
col="schoolsup",
col_order=["yes", "no"],row="famsup",row_order=["yes","no"])
# Show plot
plt.show()
Customizing scatter plot
# Create a scatter plot of acceleration vs. mpg
sns.relplot(x="acceleration",y="mpg",data=mpg,style="origin",hue="origin")
# Show plot
plt.show()
Cars from the USA tend to accelerate more quickly and get lower miles per gallon compared to cars from Europe and Japan.
Line plotting in relational plotting
# Create line plot
sns.relplot(x="model_year",y="mpg",data=mpg,kind="line")
# Show plot
plt.show()
sns.relplot(x="model_year", y="mpg",
data=mpg, kind="line",ci="sd")
# Show plot
plt.show()
Plotting subgroups in line plots
# Add markers and make each line have the same style
sns.relplot(x="model_year", y="horsepower",
data=mpg, kind="line",
ci=None, style="origin",
hue="origin",markers=True)
# Show plot
plt.show()
Catplot
# Change the orientation of the plot
sns.catplot(x="Internet usage", data=survey_data,
kind="count")
# Show plot
plt.show()
Customizing bar plots
# List of categories from lowest to highest
category_order = ["<2 hours",
"2 to 5 hours",
"5 to 10 hours",
">10 hours"]
# Turn off the confidence intervals
sns.catplot(x="study_time", y="G3",
data=student_data,
kind="bar",
order=category_order)
# Show plot
plt.show()
Box plot
# Specify the category ordering
study_time_order = ["<2 hours", "2 to 5 hours",
"5 to 10 hours", ">10 hours"]
# Create a box plot and set the order of the categories
sns.catplot(x="study_time",y="G3",data=student_data,kind="box",order=study_time_order)
# Show plot
plt.show()
# Create a box plot with subgroups and omit the outliers
sns.catplot(x="internet",y="G3",data=student_data,kind="box",
sym="",hue="location")
# Show plot
plt.show()
Customizing Seaborn Plots
Changing style and palette
# Set the style to "whitegrid"
sns.set_style("whitegrid")
# Create a count plot of survey responses
category_order = ["Never", "Rarely", "Sometimes",
"Often", "Always"]
sns.catplot(x="Parents Advice",
data=survey_data,
kind="count",
order=category_order)
# Show plot
plt.show()
# Change the color palette to "RdBu"
sns.set_style("whitegrid")
sns.set_palette("RdBu")
# Create a count plot of survey responses
category_order = ["Never", "Rarely", "Sometimes",
"Often", "Always"]
sns.catplot(x="Parents Advice",
data=survey_data,
kind="count",
order=category_order)
# Show plot
plt.show()
using custom palette
# Set the style to "darkgrid"
sns.set_style("darkgrid")
# Set a custom color palette
sns.set_palette(["#39A7D0", "#36ADA4"])
# Create the box plot of age distribution by gender
sns.catplot(x="Gender", y="Age",
data=survey_data, kind="box")
# Show plot
plt.show()
Box plot with subgroups
# Set palette to "Blues"
sns.set_palette("Blues")
# Adjust to add subgroups based on "Interested in Pets"
g = sns.catplot(x="Gender",
y="Age", data=survey_data,
kind="box", hue="Interested in Pets")
# Set title to "Age of Those Interested in Pets vs. Not"
g.fig.suptitle("Age of Those Interested in Pets vs. Not")
# Show plot
plt.show()
0 comments:
Post a Comment