Saturday, December 25, 2021

Data visualization with Seaborn

Leave a Comment

 An Introduction to data visualization with Seaborn

 

 This is part of my learning journey towards data science through Datacamp.

# Import Matplotlib and Seaborn
import matplotlib.pyplot as plt
import seaborn as sns

# Create scatter plot with GDP on the x-axis and number of phones on the y-axis
sns.scatterplot(x=gdp,y=phones)
plt.show()

plot

Count plot

# Create count plot with region on the y-axis
sns.countplot(y=region)

# Show plot
plt.show()

plot

# Create a DataFrame from csv file
df=pd.read_csv(csv_filepath)

# Create a count plot with "Spiders" on the x-axis
sns.countplot(x="Spiders",data=df)

# Display the plot
plt.show()

plot

Third variable with Hue

# Import Matplotlib and Seaborn
import matplotlib.pyplot as plt
import seaborn as sns

# Create a scatter plot of absences vs. final grade
sns.scatterplot(x="absences",y="G3",data=student_data,hue="location")



# Show plot
plt.show()

plot

# Change the legend order in the scatter plot
sns.scatterplot(x="absences", y="G3", 
                data=student_data, 
                hue="location",hue_order=["Rural","Urban"])

# Show plot
plt.show()

plot

It looks like students with higher absences tend to have lower grades in both rural and urban areas.

 

import matplotlib.pyplot as plt
import seaborn as sns

# Create a dictionary mapping subgroup values to colors
palette_colors = {"Rural": "green", "Urban": "blue"}

# Create a count plot of school with location subgroups
sns.countplot(x="school",data=student_data,hue="location",palette=palette_colors)
# Display plot
plt.show()

 plot

Relational plot

# Change to use relplot() instead of scatterplot()
sns.relplot(x="absences", y="G3", 
                data=student_data,kind="scatter")

# Show plot
plt.show()

plot

# Change to make subplots based on study time
sns.relplot(x="absences", y="G3", 
            data=student_data,
            kind="scatter",col="study_time")

# Show plot
plt.show()

plot

# Change this scatter plot to arrange the plots in rows instead of columns
sns.relplot(x="absences", y="G3", 
            data=student_data,
            kind="scatter", 
            row="study_time")

# Show plot
plt.show()

plot

Creating two-factor subplots

# Create a scatter plot of G1 vs. G3
sns.relplot(x="G1",y="G3",data=student_data
,kind="scatter"
)
# Show plot
plt.show()

plot

# Adjust to add subplots based on school support
sns.relplot(x="G1", y="G3",             data=student_data,
            kind="scatter",col="schoolsup"
,col_order=["yes","no"]
)
# Show plot
plt.show()

plot

# Adjust further to add subplots based on family support
sns.relplot(x="G1", y="G3", 
            data=student_data,
            kind="scatter", 
            col="schoolsup",
            col_order=["yes", "no"],row="famsup",row_order=["yes","no"])

# Show plot
plt.show()

plot

Customizing  scatter plot

# Create a scatter plot of acceleration vs. mpg
sns.relplot(x="acceleration",y="mpg",data=mpg,style="origin",hue="origin")

# Show plot
plt.show()

plot

Cars from the USA tend to accelerate more quickly and get lower miles per gallon compared to cars from Europe and Japan.

Line plotting in relational plotting

# Create line plot
sns.relplot(x="model_year",y="mpg",data=mpg,kind="line")

# Show plot
plt.show()

plot

sns.relplot(x="model_year", y="mpg",
            data=mpg, kind="line",ci="sd")

# Show plot
plt.show()

plot

Plotting subgroups in line plots

# Add markers and make each line have the same style
sns.relplot(x="model_year", y="horsepower", 
            data=mpg, kind="line", 
            ci=None, style="origin", 
            hue="origin",markers=True)

# Show plot
plt.show()
plot

Catplot

# Change the orientation of the plot
sns.catplot(x="Internet usage", data=survey_data,
            kind="count")

# Show plot
plt.show()

plot

Customizing bar plots

# List of categories from lowest to highest
category_order = ["<2 hours", 
                  "2 to 5 hours", 
                  "5 to 10 hours", 
                  ">10 hours"]

# Turn off the confidence intervals
sns.catplot(x="study_time", y="G3",
            data=student_data,
            kind="bar",
            order=category_order)

# Show plot
plt.show()

plot

Box plot 

# Specify the category ordering
study_time_order = ["<2 hours", "2 to 5 hours", 
                    "5 to 10 hours", ">10 hours"]
# Create a box plot and set the order of the categories
sns.catplot(x="study_time",y="G3",data=student_data,kind="box",order=study_time_order)
# Show plot
plt.show()

plot

# Create a box plot with subgroups and omit the outliers
sns.catplot(x="internet",y="G3",data=student_data,kind="box",
    sym="",hue="location")
# Show plot
plt.show()

plot

Customizing Seaborn Plots

Changing style and palette

# Set the style to "whitegrid"
sns.set_style("whitegrid")

# Create a count plot of survey responses
category_order = ["Never", "Rarely", "Sometimes", 
                  "Often", "Always"]

sns.catplot(x="Parents Advice", 
            data=survey_data, 
            kind="count", 
            order=category_order)

# Show plot
plt.show()

plot

# Change the color palette to "RdBu"
sns.set_style("whitegrid")
sns.set_palette("RdBu")

# Create a count plot of survey responses
category_order = ["Never", "Rarely", "Sometimes", 
                  "Often", "Always"]

sns.catplot(x="Parents Advice", 
            data=survey_data, 
            kind="count", 
            order=category_order)

# Show plot
plt.show()

plot

using custom palette

# Set the style to "darkgrid"
sns.set_style("darkgrid")
# Set a custom color palette
sns.set_palette(["#39A7D0", "#36ADA4"])
# Create the box plot of age distribution by gender
sns.catplot(x="Gender", y="Age", 
            data=survey_data, kind="box")

# Show plot
plt.show()

plot

Box plot with subgroups

# Set palette to "Blues"
sns.set_palette("Blues")

# Adjust to add subgroups based on "Interested in Pets"
g = sns.catplot(x="Gender",
                y="Age", data=survey_data, 
                kind="box", hue="Interested in Pets")

# Set title to "Age of Those Interested in Pets vs. Not"
g.fig.suptitle("Age of Those Interested in Pets vs. Not")

# Show plot
plt.show()

plot

Bar plot with subgroups and subplots


 



If You Enjoyed This, Take 5 Seconds To Share It

0 comments: