Data Visualization exercices notebook
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
# load the iris dataset
iris = sns.load_dataset('iris')
iris.head()
sepal_length | sepal_width | petal_length | petal_width | species | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
1 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
2 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
3 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
4 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
Plot a histogram of the sepal length of the iris flowers.
# plot a histogram of the sepal length
sns.histplot(iris['sepal_length'])
plt.show()
Plot a bar chart of the mean values of each of the four features for each species.
# calculate the mean values of each of the four features for each species
means = iris.groupby('species').mean()
# plot a bar chart of the means
means.plot(kind='bar')
plt.xlabel('Species')
plt.ylabel('Mean Value')
plt.show()
Create a scatter plot of the sepal length and width, with different colors for different species.
# create a scatter plot of the sepal length and width, with different colors for different species
sns.scatterplot(x='sepal_length', y='sepal_width', hue='species', data=iris)
plt.show()
Create a boxplot of the petal width for each species.
# create a boxplot of the petal width for each species
sns.boxplot(x='species', y='petal_width', data=iris)
plt.show()
Create a violin plot of the sepal length, with different colors for different species.
# create a violin plot of the sepal length and width, with different colors for different species
sns.violinplot(x='species', y='sepal_length', hue='species', inner='quartile', data=iris)
plt.show()
Create a violin plot of the sepal width, with different colors for different species.
sns.violinplot(x='species', y='sepal_width', hue='species', inner='quartile', data=iris)
<AxesSubplot:xlabel='species', ylabel='sepal_width'>
Load dataset diamonds
diamonds = sns.load_dataset('diamonds')
diamonds.head()
carat | cut | color | clarity | depth | table | price | x | y | z | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 0.23 | Ideal | E | SI2 | 61.5 | 55.0 | 326 | 3.95 | 3.98 | 2.43 |
1 | 0.21 | Premium | E | SI1 | 59.8 | 61.0 | 326 | 3.89 | 3.84 | 2.31 |
2 | 0.23 | Good | E | VS1 | 56.9 | 65.0 | 327 | 4.05 | 4.07 | 2.31 |
3 | 0.29 | Premium | I | VS2 | 62.4 | 58.0 | 334 | 4.20 | 4.23 | 2.63 |
4 | 0.31 | Good | J | SI2 | 63.3 | 58.0 | 335 | 4.34 | 4.35 | 2.75 |
Create a scatter plot with x-axis as 'carat' and y-axis as 'price' from the diamonds dataset.
import matplotlib.pyplot as plt
import seaborn as sns
diamonds = sns.load_dataset('diamonds')
plt.scatter(x='carat', y='price', data=diamonds)
plt.xlabel('Carat')
plt.ylabel('Price')
plt.show()
sns.scatterplot('carat', 'price', hue='clarity', data=diamonds[diamonds.clarity=="IF"])
plt.xlabel('Carat')
plt.ylabel('Price')
plt.title("Price vs. Carat for IF Diamonds")
plt.show()
/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
sns.scatterplot('carat', 'price', hue='clarity', data=diamonds[diamonds.clarity=="I1"])
plt.xlabel('Carat')
plt.ylabel('Price')
plt.title("Price vs. Carat for I1 Diamonds")
plt.show()
/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
Create a histogram of the 'price' column with 30 bins. Add a line that represents the mean price in red.
plt.hist(diamonds['price'], bins=30)
plt.axvline(diamonds.price.mean(), color='red')
plt.xlabel('Price')
plt.ylabel('Frequency')
plt.title('Histogram of diamond prices')
plt.show()
Create a bar plot of the count of diamonds for each 'cut' category.
# Create a bar chart
sns.countplot(x='cut', data=diamonds)
plt.xlabel('Cut')
plt.ylabel('Count')
plt.show()
Create a box plot of the 'price' column for each 'cut' category.
# Create a box plot
sns.boxplot(x='cut', y='price', data=diamonds)
plt.xlabel('Cut')
plt.ylabel('Price')
plt.title('Diamond Price by Cut')
plt.show()
Create a heat map using seaborn to show the correlation between all numeric columns in the diamonds dataset.
# Create a correlation matrix
corr = diamonds.corr()
# Create a heat map
sns.heatmap(corr, cmap='coolwarm', annot=True)
plt.title('Correlation Heat Map')
plt.show()
Create a line plot of the average 'price' per 'carat' for each 'cut' category.
avg_price_by_carat = diamonds.groupby('carat')['price'].mean()
plt.plot(avg_price_by_carat.index, avg_price_by_carat.values)
plt.xlabel('Carat')
plt.ylabel('Average Price')
plt.title('Average Price by Carat')
plt.show()
Create a stacked bar chart of the count of diamonds by cut and color.
diamonds_by_cut_color = diamonds.groupby(['cut', 'color'])['price'].count().unstack()
diamonds_by_cut_color.plot(kind='bar', stacked=True)
plt.xlabel('Cut')
plt.ylabel('Count')
plt.title('Count of Diamonds by Cut and Color')
plt.show()
Create a histogram of the carat weight distribution of diamonds with different color grades.
plt.hist(diamonds[diamonds['color'] == 'D']['carat'], alpha=0.5, label='D', density=True)
plt.hist(diamonds[diamonds['color'] == 'E']['carat'], alpha=0.5, label='E', density=True)
plt.hist(diamonds[diamonds['color'] == 'F']['carat'], alpha=0.5, label='F', density=True)
plt.xlabel('Carat')
plt.ylabel('Density')
plt.title('Carat Weight Distribution by Color Grade')
plt.legend()
plt.show()