Data Visualization exercices notebook

In [ ]:

Copied!





import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# load the iris dataset
iris = sns.load_dataset('iris')
iris.head()
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# load the iris dataset
iris = sns.load_dataset('iris')
iris.head()

Out[ ]:

	sepal_length	sepal_width	petal_length	petal_width	species
0	5.1	3.5	1.4	0.2	setosa
1	4.9	3.0	1.4	0.2	setosa
2	4.7	3.2	1.3	0.2	setosa
3	4.6	3.1	1.5	0.2	setosa
4	5.0	3.6	1.4	0.2	setosa

Plot a histogram of the sepal length of the iris flowers.

In [ ]:

Copied!

# plot a histogram of the sepal length
sns.histplot(iris['sepal_length'])
plt.show()
# plot a histogram of the sepal length
sns.histplot(iris['sepal_length'])
plt.show()

No description has been provided for this image

Plot a bar chart of the mean values of each of the four features for each species.

In [ ]:

Copied!





# calculate the mean values of each of the four features for each species
means = iris.groupby('species').mean()

# plot a bar chart of the means
means.plot(kind='bar')
plt.xlabel('Species')
plt.ylabel('Mean Value')
plt.show()
# calculate the mean values of each of the four features for each species
means = iris.groupby('species').mean()

# plot a bar chart of the means
means.plot(kind='bar')
plt.xlabel('Species')
plt.ylabel('Mean Value')
plt.show()

Create a scatter plot of the sepal length and width, with different colors for different species.

In [ ]:

Copied!

# create a scatter plot of the sepal length and width, with different colors for different species
sns.scatterplot(x='sepal_length', y='sepal_width', hue='species', data=iris)
plt.show()
# create a scatter plot of the sepal length and width, with different colors for different species
sns.scatterplot(x='sepal_length', y='sepal_width', hue='species', data=iris)
plt.show()

Create a boxplot of the petal width for each species.

In [ ]:

Copied!

# create a boxplot of the petal width for each species
sns.boxplot(x='species', y='petal_width', data=iris)
plt.show()
# create a boxplot of the petal width for each species
sns.boxplot(x='species', y='petal_width', data=iris)
plt.show()

Create a violin plot of the sepal length, with different colors for different species.

In [ ]:

Copied!

# create a violin plot of the sepal length and width, with different colors for different species
sns.violinplot(x='species', y='sepal_length', hue='species', inner='quartile', data=iris)
plt.show()
# create a violin plot of the sepal length and width, with different colors for different species
sns.violinplot(x='species', y='sepal_length', hue='species', inner='quartile', data=iris)
plt.show()

Create a violin plot of the sepal width, with different colors for different species.

In [ ]:

Copied!

sns.violinplot(x='species', y='sepal_width', hue='species', inner='quartile', data=iris)
sns.violinplot(x='species', y='sepal_width', hue='species', inner='quartile', data=iris)

Out[ ]:

<AxesSubplot:xlabel='species', ylabel='sepal_width'>

Load dataset diamonds

In [ ]:

Copied!

diamonds = sns.load_dataset('diamonds')
diamonds.head()
diamonds = sns.load_dataset('diamonds')
diamonds.head()

Out[ ]:

	carat	cut	color	clarity	depth	table	price	x	y	z
0	0.23	Ideal	E	SI2	61.5	55.0	326	3.95	3.98	2.43
1	0.21	Premium	E	SI1	59.8	61.0	326	3.89	3.84	2.31
2	0.23	Good	E	VS1	56.9	65.0	327	4.05	4.07	2.31
3	0.29	Premium	I	VS2	62.4	58.0	334	4.20	4.23	2.63
4	0.31	Good	J	SI2	63.3	58.0	335	4.34	4.35	2.75

Create a scatter plot with x-axis as 'carat' and y-axis as 'price' from the diamonds dataset.

In [ ]:

Copied!





import matplotlib.pyplot as plt
import seaborn as sns

diamonds = sns.load_dataset('diamonds')

plt.scatter(x='carat', y='price', data=diamonds)
plt.xlabel('Carat')
plt.ylabel('Price')
plt.show()
import matplotlib.pyplot as plt
import seaborn as sns

diamonds = sns.load_dataset('diamonds')

plt.scatter(x='carat', y='price', data=diamonds)
plt.xlabel('Carat')
plt.ylabel('Price')
plt.show()

In [ ]:

Copied!





sns.scatterplot('carat', 'price', hue='clarity',  data=diamonds[diamonds.clarity=="IF"])
plt.xlabel('Carat')
plt.ylabel('Price')
plt.title("Price vs. Carat for IF Diamonds")
plt.show()
sns.scatterplot('carat', 'price', hue='clarity',  data=diamonds[diamonds.clarity=="IF"])
plt.xlabel('Carat')
plt.ylabel('Price')
plt.title("Price vs. Carat for IF Diamonds")
plt.show()

/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  warnings.warn(

In [ ]:

Copied!





sns.scatterplot('carat', 'price', hue='clarity',  data=diamonds[diamonds.clarity=="I1"])
plt.xlabel('Carat')
plt.ylabel('Price')
plt.title("Price vs. Carat for I1 Diamonds")
plt.show()
sns.scatterplot('carat', 'price', hue='clarity',  data=diamonds[diamonds.clarity=="I1"])
plt.xlabel('Carat')
plt.ylabel('Price')
plt.title("Price vs. Carat for I1 Diamonds")
plt.show()

/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  warnings.warn(

Create a histogram of the 'price' column with 30 bins. Add a line that represents the mean price in red.

In [ ]:

Copied!





plt.hist(diamonds['price'], bins=30)
plt.axvline(diamonds.price.mean(), color='red')
plt.xlabel('Price')
plt.ylabel('Frequency')
plt.title('Histogram of diamond prices')
plt.show()
plt.hist(diamonds['price'], bins=30)
plt.axvline(diamonds.price.mean(), color='red')
plt.xlabel('Price')
plt.ylabel('Frequency')
plt.title('Histogram of diamond prices')
plt.show()

Create a bar plot of the count of diamonds for each 'cut' category.

In [ ]:

Copied!





# Create a bar chart
sns.countplot(x='cut', data=diamonds)
plt.xlabel('Cut')
plt.ylabel('Count')
plt.show()
# Create a bar chart
sns.countplot(x='cut', data=diamonds)
plt.xlabel('Cut')
plt.ylabel('Count')
plt.show()

Create a box plot of the 'price' column for each 'cut' category.

In [ ]:

Copied!





# Create a box plot
sns.boxplot(x='cut', y='price', data=diamonds)
plt.xlabel('Cut')
plt.ylabel('Price')
plt.title('Diamond Price by Cut')
plt.show()
# Create a box plot
sns.boxplot(x='cut', y='price', data=diamonds)
plt.xlabel('Cut')
plt.ylabel('Price')
plt.title('Diamond Price by Cut')
plt.show()

Create a heat map using seaborn to show the correlation between all numeric columns in the diamonds dataset.

In [ ]:

Copied!





# Create a correlation matrix
corr = diamonds.corr()

# Create a heat map
sns.heatmap(corr, cmap='coolwarm', annot=True)
plt.title('Correlation Heat Map')
plt.show()
# Create a correlation matrix
corr = diamonds.corr()

# Create a heat map
sns.heatmap(corr, cmap='coolwarm', annot=True)
plt.title('Correlation Heat Map')
plt.show()

Create a line plot of the average 'price' per 'carat' for each 'cut' category.

In [ ]:

Copied!





avg_price_by_carat = diamonds.groupby('carat')['price'].mean()
plt.plot(avg_price_by_carat.index, avg_price_by_carat.values)
plt.xlabel('Carat')
plt.ylabel('Average Price')
plt.title('Average Price by Carat')
plt.show()
avg_price_by_carat = diamonds.groupby('carat')['price'].mean()
plt.plot(avg_price_by_carat.index, avg_price_by_carat.values)
plt.xlabel('Carat')
plt.ylabel('Average Price')
plt.title('Average Price by Carat')
plt.show()

Create a stacked bar chart of the count of diamonds by cut and color.

In [ ]:

Copied!





diamonds_by_cut_color = diamonds.groupby(['cut', 'color'])['price'].count().unstack()
diamonds_by_cut_color.plot(kind='bar', stacked=True)
plt.xlabel('Cut')
plt.ylabel('Count')
plt.title('Count of Diamonds by Cut and Color')
plt.show()
diamonds_by_cut_color = diamonds.groupby(['cut', 'color'])['price'].count().unstack()
diamonds_by_cut_color.plot(kind='bar', stacked=True)
plt.xlabel('Cut')
plt.ylabel('Count')
plt.title('Count of Diamonds by Cut and Color')
plt.show()

Create a histogram of the carat weight distribution of diamonds with different color grades.

In [ ]:

Copied!





plt.hist(diamonds[diamonds['color'] == 'D']['carat'], alpha=0.5, label='D', density=True)
plt.hist(diamonds[diamonds['color'] == 'E']['carat'], alpha=0.5, label='E', density=True)
plt.hist(diamonds[diamonds['color'] == 'F']['carat'], alpha=0.5, label='F', density=True)
plt.xlabel('Carat')
plt.ylabel('Density')
plt.title('Carat Weight Distribution by Color Grade')
plt.legend()
plt.show()
plt.hist(diamonds[diamonds['color'] == 'D']['carat'], alpha=0.5, label='D', density=True)
plt.hist(diamonds[diamonds['color'] == 'E']['carat'], alpha=0.5, label='E', density=True)
plt.hist(diamonds[diamonds['color'] == 'F']['carat'], alpha=0.5, label='F', density=True)
plt.xlabel('Carat')
plt.ylabel('Density')
plt.title('Carat Weight Distribution by Color Grade')
plt.legend()
plt.show()

In [ ]: