Basic Drawing Operations of Seaborn Library
#seaborn library visualization template #Guide Library from matplotlib import pyplot as plt import pandas as pd import numpy as np import seaborn as sns # Set Chinese font sns.set_style('whitegrid', {'font.sans-serif':['simhei', 'Arial']}) # Ignore Warnings import warnings warnings.filterwarnings('ignore') #Load Data data = pd.read_csv('.../data.csv')
1, seaborn's drawing style
1. Theme Style
There are five preset themes in seaborn library: darkgird (gray background+white grid), whitegrid (white background+black grid), dark (only gray background), white (only white background), and ticks (coordinate axis with scale).
Personally recommend whitegrid or white
The topics of darkgrid and whitegrid are helpful for finding quantitative information when drawing
The dark and white themes help prevent confusion between grids and lines representing data
ticks theme helps to reflect a small number of special data element structures
set_style() modifies the theme. The default theme is darkgrid
sns.set_style( style = None , rc = None)
set_ The style function can only modify axes_ Parameters displayed by the style function, axes_ The style function can achieve the effect of temporarily setting the graphic style.
x = np.arange(1, 10, 2) y1 = x + 1 y2 = x + 3 y3 = x + 5 def showLine(flip=1): sns.lineplot(x, y1) sns.lineplot(x, y2) sns.lineplot(x, y3) pic = plt.figure(figsize=(12, 8)) with sns.axes_style('darkgrid'): # Using the darkgrid theme pic.add_subplot(2, 3, 1) showLine() plt.title('darkgrid') with sns.axes_style('whitegrid'): # Use the whitegrid theme pic.add_subplot(2, 3, 2) showLine() plt.title('whitegrid') with sns.axes_style('dark'): # Using the dark Theme pic.add_subplot(2, 3, 3) showLine() plt.title('dark') with sns.axes_style('white'): # Use white theme pic.add_subplot(2, 3, 4) showLine() plt.title('white') with sns.axes_style('ticks'): # Use ticks theme pic.add_subplot(2, 3, 5) showLine() plt.title('ticks') sns.set_style(style='darkgrid', rc={'font.sans-serif': ['MicrosoftYaHei', 'SimHei'], 'grid.color': 'black'}) # Modify parameters in the theme pic.add_subplot(2, 3, 6) showLine() plt.title('modify parameters') plt.show()
2. Element Scaling
set_context() can set the size of the output graph, which is None by default
The context parameter can accept paper, notebook, talk, and poster types
paper < notebook < talk < poster
Use set_ The context function can only modify plotting_ Parameter displayed by the context function, plotting_ The context function changes the size of labels, lines or other elements in the graph by adjusting parameters, but does not affect the overall style
sns.set_context( context = None , font_scale = 1, rc = None )
sns.set() x = np.arange(1, 10, 2) y1 = x + 1 y2 = x + 3 y3 = x + 5 def showLine(flip=1): sns.lineplot(x, y1) sns.lineplot(x, y2) sns.lineplot(x, y3) pic = plt.figure(figsize=(8, 8)) # Restore default parameters pic = plt.figure(figsize=(8, 8), dpi=100) with sns.plotting_context('paper'): # Select paper type pic.add_subplot(2, 2, 1) showLine() plt.title('paper') with sns.plotting_context('notebook'): # Select notebook type pic.add_subplot(2, 2, 2) showLine() plt.title('notebook') with sns.plotting_context('talk'): # Select talk type pic.add_subplot(2, 2, 3) showLine() plt.title('talk') with sns.plotting_context('poster'): # Select post type pic.add_subplot(2, 2, 4) showLine() plt.title('poster') plt.show()
3. Border control
despine() can remove the border at any position, adjust the position of the border, and trim the length of the border
sns.despine( fig = None , ax = None , top = True , right = True , left = False , bottom = False , offset = None , trim = False )
parameter | explain |
---|---|
top | Receive boolean, which means to delete the top border. Default to True |
right | Receive boolean, which means to delete the right border. Default to True |
left | Receive boolean, which means to delete the left border. Default is False |
bottom | Receive boolean, which means to delete the bottom border. Default is False |
offset | Receive int or dict, indicating the distance between the frame and the coordinate axis. The default is None |
trim | Receive boolean, indicating that the border is limited to the minimum and maximum major divisions on each non twisted axis. Default is False |
#No parameter despine() with sns.axes_style('white'): showLine() sns.despine() # The default parameter free state is to delete the upper and right borders plt.title('Control drawing borders') plt.show()
#The distance between the frame and the coordinate axis is 10 with sns.axes_style('white'): data = np.random.normal(size=(20, 6)) + np.arange(6) / 2 sns.boxplot(data=data) sns.despine(offset=10, left=False, bottom=False) plt.title('Control drawing borders') plt.show()
2, Draw a diagram
1. Scatterplot - sns scatterplot( )
# Load Data hr = pd.read_csv('../data/hr.csv', encoding='gbk') # Extract data with the product development department as the department and the resignation as 1 product = hr.iloc[(hr['department'].values=='Product Development Department') & (hr['quit'].values==1), :] ax = sns.scatterplot(x='score', y='Average working hours per month (hours)', data=product) plt.title('Scatter Chart of Evaluation Score and Average Working Time') plt.show()
#Highlight categories by shading points and changing markers markers = {'low' : 'o', 'in' : 'D', 'high' : 's'} sns.scatterplot(x='score', y='Average working hours per month (hours)',hue='salary', style='salary', markers=markers, data=product) plt.title('Scatter Chart of Evaluation Score and Average Working Time') plt.show()
2. Line Chart - sns lineplot( )
Main parameters:
estimator: receives the pandas method, and can call the function None. Represents the aggregation method of y at the same x level. The default is mean
ci: receive int, sd, None, indicating the size of the confidence interval aggregated using the estimator parameter, and sd indicates the standard deviation of the data. Default is 95
# Draw a line chart of the number of rooms and the house price boston = pd.read_csv('../data/boston_house_prices.csv', encoding='gbk') sns.lineplot(x='Number of rooms', y='House price (thousand dollars)', data=boston, ci=0) plt.title('Number of rooms and house price') plt.show()
# Draw a line chart of seniority and scoring IT = hr.iloc[hr['department'].values=='IT Department', :] sns.lineplot(x='Length of service (years)', y='score', hue='quit', data=IT, ci=0) plt.title('Length of service and previous year's evaluation') plt.show()
3. Thermal diagram heatmap()
plt.rcParams['axes.unicode_minus'] = False #plt.figure(figsize=(16, 12)) corr = boston.corr() # Correlation coefficient matrix of characteristics sns.heatmap(corr) plt.title('Characteristic matrix thermodynamic diagram') plt.show()
#Add Data Tag plt.figure(figsize=(10, 10)) sns.heatmap(corr, annot=True, fmt='.2f') plt.title('Characteristic matrix thermodynamic diagram') plt.show()
4. Matrix Grid - PairGrid()
PairGrid() can be used to draw a grid graph of the degree of data correlation
PairGrid() maps each variable in the dataset to columns and rows in multiple grids, and can use different drawing functions to draw dual variable graphs of upper and lower triangles, showing the relationship between two variables in the dataset.
#Draw a correlation grid between crime rate, nitric oxide content, number of rooms and house price g = sns.PairGrid(boston, vars=['crime rate', 'Nitric oxide content( ppm)', 'Number of rooms', 'House price (thousand dollars)']) g = g.map(plt.scatter) plt.suptitle('Matrix Grid', verticalalignment='bottom' , y=1) plt.show()
# Draw subsets of data with different colors sell = hr.iloc[(hr['department'].values=='Sales Department') & (hr['quit'].values==1), :] g = sns.PairGrid(sell, vars=['Satisfaction', 'score', 'Average working hours per month (hours)'], hue='salary', palette='Set3') g = g.map_diag(sns.kdeplot) g = g.map_offdiag(plt.scatter) plt.suptitle('Matrix grid with different colors', verticalalignment='bottom' , y=1) plt.show()
5. Relational Grid Composition Diagram - relplot()
relplot() can uniformly access the scatterplot function and lineplot function to draw the relational grid composite graph
#Draw a monostructure scatter diagram sns.relplot(x='Satisfaction', y='score', hue='salary', data=sell) plt.title('Satisfaction level and last year's evaluation') plt.show()
#Draw grid diagram sns.relplot(x='Satisfaction', y='score', hue='5 Promotion within the year', row='salary', col='Work accident', data=IT) plt.show()
sns.relplot(x='Satisfaction', y='score', hue='5 Promotion within the year', col='Work accident', col_wrap=1, data=IT) plt.show()
3, Draw classification map
1. Bar Chart - barplot()
from matplotlib import pyplot as plt import pandas as pd import seaborn as sns import math # Load Data boston = pd.read_csv('E:/desktop/source code+experimental data /Chapter 4/data/boston_house_prices.csv', encoding='gbk') hr = pd.read_csv('E:/desktop/source code+experimental data /Chapter 4/data/hr.csv', encoding='gbk') # Drawing with seaborn library sns.set_style('whitegrid', {'font.sans-serif':['simhei', 'Arial']}) # Set Chinese font plt.rcParams['font.sans-serif'] = ['SimHei'] # Set to display minus sign normally plt.rcParams['axes.unicode_minus']=False # Draw a bar chart of the total number of personnel in each department count = hr['department'].value_counts() index = count.index sns.barplot(x=count, y=index) plt.xticks(rotation=70) plt.xlabel('department') plt.ylabel('total') plt.title('Comparison of the number of people in each department') plt.show()
2. Count Graph - countplot()
#Draw count charts of x-axis and y-axis display data plt.figure(figsize=(8, 4)) plt.subplot(121) #The canvas is divided into one row and two columns, and the sub graph is represented as the first sns.countplot(x='Length of service (years)', data=hr) plt.title('x Counting chart of axis display data') plt.ylabel('count') plt.subplot(122) #The canvas is divided into one row and two columns, and the sub graph is represented as the second sns.countplot(y='Length of service (years)', data=hr) plt.title('y Counting chart of axis display data') plt.xlabel('count') plt.show()
# Draw count graph of multi category nesting sns.countplot(x='5 Promotion within the year', hue='salary', data=hr, palette='Set2') plt.suptitle('Multivariate Scatter Chart') plt.ylabel('total') plt.show()
3. Draw univariate distribution (histogram) - distplot()
#Draw univariate distribution map sns.distplot(boston['property tax'], kde=False) plt.title('Distribution map of single variable') plt.ylabel('quantity') plt.show()
4. Draw classification scatter plots - stripplot(), swarmplot()
(1)stripplot( )
stripplot() receives multiple types of transferred data, including lists, Numpy arrays, DataFrame s, Series, arrays, or vectors.
Main parameters:
jitter: it means adding uniform random noise (only changing the graph) to optimize the graph display. The default is True.
#Draw a simple horizontal distribution scatter plot sale = hr.iloc[(hr['department'].values=='Sales Department') & (hr['quit'].values==1), :] sns.stripplot(x=sale['Average working hours per month (hours)']) plt.title('Simple Horizontal Scatter Chart') plt.show()
#Add random noise jitter hr1 = hr.iloc[hr['quit'].values==1, :] plt.figure(figsize=(10, 5)) plt.subplot(121) plt.xticks(rotation=70) sns.stripplot(x='department', y='Average working hours per month (hours)', data=hr1) # Add random noise by default plt.title('Default random noise jitter') plt.subplot(122) plt.xticks(rotation=70) sns.stripplot(x='department', y='Average working hours per month (hours)', data=hr1, jitter=False) # No random noise added plt.title('No random noise jitter') plt.show()
#Display the second classification condition in color hr2 = hr.iloc[(hr['salary'].values=='high') & (hr['quit'].values==0), :] sns.stripplot(x='5 Promotion within the year', y='Average working hours per month (hours)', hue='department', data=hr2, jitter=True) plt.title('Promotion in the first five years and average monthly working hours') plt.show()
#Classify variables along the classification axis plt.figure(figsize=(70, 13)) plt.subplot(211) plt.xticks(rotation=70) plt.title('Average monthly working hours of different departments') sns.stripplot(x='department', y='Average working hours per month (hours)', hue='5 Promotion within the year', data=hr2) plt.subplot(212) plt.xticks(rotation=70) sns.stripplot(x='department', y='Average working hours per month (hours)', hue='5 Promotion within the year', data=hr2, dodge=True) plt.show()
(2)swarmplot( )
After adding random noise to the stripplot function to increase the pattern jitter and drawing variables along the classification axis, there is still the possibility of overlap.
This can be avoided by using the swarmplot function, which can draw a classified scatter plot with non overlapping points.
#Draw a simple distribution density scatter plot sns.swarmplot(x='department', y='Average working hours per month (hours)', data=hr2) plt.xticks(rotation=70) plt.title('Average monthly working hours of different departments') plt.show()
#Add multiple nested classification variables sns.swarmplot(x='department', y='Average working hours per month (hours)', hue='5 Promotion within the year', data=hr2) plt.xticks(rotation=30) plt.title('Average monthly working hours of different departments') plt.show()
5. Draw enhanced boxplot boxenplot()
#Draw common boxplot and enhanced boxplot fig, axes = plt.subplots(1, 2, figsize=(8, 4)) axes[0].set_title('Ordinary Box Line Diagram') boston['Number of rooms(Rounding)'] = boston['Number of rooms'].map(math.floor) # Rounding the number of rooms sns.boxplot(x='Number of rooms(Rounding)', y='House price (thousand dollars)', data=boston, orient='v', ax=axes[0]) # ordinary axes[1].set_title('Enhanced boxplot') sns.boxenplot(x='Number of rooms(Rounding)', y='House price (thousand dollars)', data=boston, orient='v', ax=axes[1]) # enhance plt.show()
6. Draw classification grid combination diagram - pairplot()
Use the classification grid combination graph to draw the paired relationship in the dataset
#Draw a Multivariate Scatter Chart of Boston House Prices sns.pairplot(boston[['crime rate', 'Nitric oxide content( ppm)', 'Number of rooms', 'Low income people', 'House price (thousand dollars)']]) plt.suptitle('Multivariate Scatter Chart', verticalalignment='bottom', y=1) plt.show()
#Draws a scatter plot of the specified categorical variables hr3 = sale[['Satisfaction', 'Total items', 'Length of service (years)', 'salary']] sns.pairplot(hr3, hue='salary') plt.suptitle('Multivariate classification scatter plot', verticalalignment='bottom') plt.show()
4, Draw regression graph
1. Draw a linear regression fitting diagram - regplot()
from matplotlib import pyplot as plt import pandas as pd import seaborn as sns # Set Chinese font sns.set_style('whitegrid', {'font.sans-serif':['simhei', 'Arial']}) # Ignore Warnings import warnings warnings.filterwarnings('ignore') # Load Data boston = pd.read_csv('../data/boston_house_prices.csv', encoding='gbk') #Draw the linear regression fitting diagram before and after modifying the confidence interval ci parameter fig, axes = plt.subplots(1, 2, figsize=(8, 4)) axes[0].set_title('Linear regression fitting diagram before modification') axes[1].set_title('Modified linear regression fitting diagram') sns.regplot(x='Number of rooms', y='House price (thousand dollars)', data=boston, ax=axes[0]) sns.regplot(x='Number of rooms', y='House price (thousand dollars)', data=boston, ci=50, ax=axes[1]) plt.show()
2. Draw linear regression grid combination diagram - lmplot()
#Draw regression grid combination diagram of low-income population and housing price by river crossing behavior category sns.lmplot(x='Low income people', y='House price (thousand dollars)', col='River crossing', data=boston) plt.show()
'.../data/boston_house_prices.csv', encoding='gbk')
#Draw the linear regression fitting diagram before and after modifying the confidence interval ci parameter
fig, axes = plt.subplots(1, 2, figsize=(8, 4))
axes[0].set_title('Linear regression fitting chart before modification ')
axes[1].set_title('Modified Linear Regression Fitting Chart ')
sns.regplot(x='number of rooms', y=' house price (thousand dollars) ', data=boston, ax=axes[0])
sns.regplot(x='number of rooms', y=' house price (thousand dollars) ', data=boston, ci=50, ax=axes[1])
plt.show()
[External chain pictures are being transferred...(img-AgfMs08W-1664038679472)] ### 2. Draw linear regression grid combination diagram - lmplot() ```python #Draw regression grid combination diagram of low-income population and housing price by river crossing behavior category sns.lmplot(x='Low income people', y='House price (thousand dollars)', col='River crossing', data=boston) plt.show()