Basic Drawing Operations of Seaborn Library

Basic Drawing Operations of Seaborn Library

#seaborn library visualization template

#Guide Library
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

# Set Chinese font
sns.set_style('whitegrid', {'font.sans-serif':['simhei', 'Arial']})

# Ignore Warnings
import warnings
warnings.filterwarnings('ignore')

#Load Data
data = pd.read_csv('.../data.csv')

1, seaborn's drawing style

1. Theme Style

There are five preset themes in seaborn library: darkgird (gray background+white grid), whitegrid (white background+black grid), dark (only gray background), white (only white background), and ticks (coordinate axis with scale).

Personally recommend whitegrid or white

The topics of darkgrid and whitegrid are helpful for finding quantitative information when drawing

The dark and white themes help prevent confusion between grids and lines representing data

ticks theme helps to reflect a small number of special data element structures

set_style() modifies the theme. The default theme is darkgrid

sns.set_style( style = None , rc = None)

set_ The style function can only modify axes_ Parameters displayed by the style function, axes_ The style function can achieve the effect of temporarily setting the graphic style.

x = np.arange(1, 10, 2)
y1 = x + 1
y2 = x + 3
y3 = x + 5
def showLine(flip=1):
    sns.lineplot(x, y1)
    sns.lineplot(x, y2)
    sns.lineplot(x, y3)
pic = plt.figure(figsize=(12, 8))
with sns.axes_style('darkgrid'):  # Using the darkgrid theme
    pic.add_subplot(2, 3, 1)
    showLine()
    plt.title('darkgrid')
with sns.axes_style('whitegrid'):  # Use the whitegrid theme
    pic.add_subplot(2, 3, 2)
    showLine()
    plt.title('whitegrid')
with sns.axes_style('dark'):  # Using the dark Theme
    pic.add_subplot(2, 3, 3)
    showLine()
    plt.title('dark')
with sns.axes_style('white'):  # Use white theme
    pic.add_subplot(2, 3, 4)
    showLine()
    plt.title('white')
with sns.axes_style('ticks'):  # Use ticks theme
    pic.add_subplot(2, 3, 5)
    showLine()
    plt.title('ticks')
sns.set_style(style='darkgrid', rc={'font.sans-serif': ['MicrosoftYaHei', 'SimHei'],
                            'grid.color': 'black'})  # Modify parameters in the theme
pic.add_subplot(2, 3, 6)
showLine()
plt.title('modify parameters')
plt.show()

2. Element Scaling

set_context() can set the size of the output graph, which is None by default

The context parameter can accept paper, notebook, talk, and poster types

paper < notebook < talk < poster

Use set_ The context function can only modify plotting_ Parameter displayed by the context function, plotting_ The context function changes the size of labels, lines or other elements in the graph by adjusting parameters, but does not affect the overall style

sns.set_context( context = None , font_scale = 1, rc = None )

sns.set()
x = np.arange(1, 10, 2)
y1 = x + 1
y2 = x + 3
y3 = x + 5
def showLine(flip=1):
    sns.lineplot(x, y1)
    sns.lineplot(x, y2)
    sns.lineplot(x, y3)
pic = plt.figure(figsize=(8, 8))
# Restore default parameters
pic = plt.figure(figsize=(8, 8), dpi=100)
with sns.plotting_context('paper'):  # Select paper type
    pic.add_subplot(2, 2, 1)
    showLine()
    plt.title('paper')
with sns.plotting_context('notebook'):  # Select notebook type
    pic.add_subplot(2, 2, 2)
    showLine()
    plt.title('notebook')
with sns.plotting_context('talk'):  # Select talk type
    pic.add_subplot(2, 2, 3)
    showLine()
    plt.title('talk')
with sns.plotting_context('poster'):  # Select post type
    pic.add_subplot(2, 2, 4)
    showLine()
    plt.title('poster')
plt.show()

3. Border control

despine() can remove the border at any position, adjust the position of the border, and trim the length of the border

sns.despine( fig = None , ax = None , top = True , right = True , left = False , bottom = False , offset = None , trim = False )

parameterexplain
topReceive boolean, which means to delete the top border. Default to True
rightReceive boolean, which means to delete the right border. Default to True
leftReceive boolean, which means to delete the left border. Default is False
bottomReceive boolean, which means to delete the bottom border. Default is False
offsetReceive int or dict, indicating the distance between the frame and the coordinate axis. The default is None
trimReceive boolean, indicating that the border is limited to the minimum and maximum major divisions on each non twisted axis. Default is False
#No parameter despine()
with sns.axes_style('white'):
    showLine()
    sns.despine()  # The default parameter free state is to delete the upper and right borders
    plt.title('Control drawing borders')
plt.show()

#The distance between the frame and the coordinate axis is 10
with sns.axes_style('white'):
    data = np.random.normal(size=(20, 6)) + np.arange(6) / 2
    sns.boxplot(data=data)
    sns.despine(offset=10, left=False, bottom=False)
    plt.title('Control drawing borders')
plt.show()

2, Draw a diagram

1. Scatterplot - sns scatterplot( )

# Load Data
hr = pd.read_csv('../data/hr.csv', encoding='gbk')
# Extract data with the product development department as the department and the resignation as 1
product = hr.iloc[(hr['department'].values=='Product Development Department') & (hr['quit'].values==1), :]
ax = sns.scatterplot(x='score', y='Average working hours per month (hours)', data=product)
plt.title('Scatter Chart of Evaluation Score and Average Working Time')
plt.show()

#Highlight categories by shading points and changing markers
markers = {'low' : 'o', 'in' : 'D', 'high' : 's'}
sns.scatterplot(x='score', y='Average working hours per month (hours)',hue='salary',
                     style='salary', markers=markers, data=product) 
plt.title('Scatter Chart of Evaluation Score and Average Working Time')
plt.show()

2. Line Chart - sns lineplot( )

Main parameters:

estimator: receives the pandas method, and can call the function None. Represents the aggregation method of y at the same x level. The default is mean

ci: receive int, sd, None, indicating the size of the confidence interval aggregated using the estimator parameter, and sd indicates the standard deviation of the data. Default is 95

# Draw a line chart of the number of rooms and the house price
boston = pd.read_csv('../data/boston_house_prices.csv', encoding='gbk')
sns.lineplot(x='Number of rooms', y='House price (thousand dollars)', data=boston, ci=0)
plt.title('Number of rooms and house price')
plt.show()

# Draw a line chart of seniority and scoring
IT = hr.iloc[hr['department'].values=='IT Department', :]
sns.lineplot(x='Length of service (years)', y='score', hue='quit', data=IT, ci=0)
plt.title('Length of service and previous year's evaluation')
plt.show()

3. Thermal diagram heatmap()

plt.rcParams['axes.unicode_minus'] = False
#plt.figure(figsize=(16, 12))
corr = boston.corr()  # Correlation coefficient matrix of characteristics
sns.heatmap(corr)
plt.title('Characteristic matrix thermodynamic diagram')
plt.show()

#Add Data Tag
plt.figure(figsize=(10, 10))
sns.heatmap(corr, annot=True, fmt='.2f')
plt.title('Characteristic matrix thermodynamic diagram')
plt.show()

4. Matrix Grid - PairGrid()

PairGrid() can be used to draw a grid graph of the degree of data correlation

PairGrid() maps each variable in the dataset to columns and rows in multiple grids, and can use different drawing functions to draw dual variable graphs of upper and lower triangles, showing the relationship between two variables in the dataset.

#Draw a correlation grid between crime rate, nitric oxide content, number of rooms and house price
g = sns.PairGrid(boston, vars=['crime rate', 'Nitric oxide content( ppm)', 'Number of rooms', 'House price (thousand dollars)'])
g = g.map(plt.scatter)
plt.suptitle('Matrix Grid', verticalalignment='bottom' , y=1)
plt.show()

# Draw subsets of data with different colors
sell = hr.iloc[(hr['department'].values=='Sales Department') & (hr['quit'].values==1), :]
g = sns.PairGrid(sell,
                 vars=['Satisfaction', 'score', 'Average working hours per month (hours)'],
                 hue='salary', palette='Set3')
g = g.map_diag(sns.kdeplot)
g = g.map_offdiag(plt.scatter)
plt.suptitle('Matrix grid with different colors', verticalalignment='bottom' , y=1)
plt.show()

5. Relational Grid Composition Diagram - relplot()

relplot() can uniformly access the scatterplot function and lineplot function to draw the relational grid composite graph

#Draw a monostructure scatter diagram
sns.relplot(x='Satisfaction', y='score', hue='salary',
            data=sell)
plt.title('Satisfaction level and last year's evaluation')
plt.show()

#Draw grid diagram
sns.relplot(x='Satisfaction', y='score', hue='5 Promotion within the year', row='salary',
            col='Work accident', data=IT)
plt.show()

sns.relplot(x='Satisfaction', y='score', hue='5 Promotion within the year', col='Work accident',
            col_wrap=1, data=IT)
plt.show()

3, Draw classification map

1. Bar Chart - barplot()

from matplotlib import pyplot as plt
import pandas as pd
import seaborn as sns
import math

# Load Data
boston = pd.read_csv('E:/desktop/source code+experimental data /Chapter 4/data/boston_house_prices.csv', encoding='gbk')
hr = pd.read_csv('E:/desktop/source code+experimental data /Chapter 4/data/hr.csv', encoding='gbk')

# Drawing with seaborn library
sns.set_style('whitegrid', {'font.sans-serif':['simhei', 'Arial']})
# Set Chinese font
plt.rcParams['font.sans-serif'] = ['SimHei']
# Set to display minus sign normally
plt.rcParams['axes.unicode_minus']=False

# Draw a bar chart of the total number of personnel in each department
count = hr['department'].value_counts()
index = count.index
sns.barplot(x=count, y=index)
plt.xticks(rotation=70)
plt.xlabel('department')
plt.ylabel('total')
plt.title('Comparison of the number of people in each department')
plt.show()

2. Count Graph - countplot()

#Draw count charts of x-axis and y-axis display data
plt.figure(figsize=(8, 4))
plt.subplot(121)	#The canvas is divided into one row and two columns, and the sub graph is represented as the first
sns.countplot(x='Length of service (years)', data=hr)
plt.title('x Counting chart of axis display data')
plt.ylabel('count')
plt.subplot(122)	#The canvas is divided into one row and two columns, and the sub graph is represented as the second
sns.countplot(y='Length of service (years)', data=hr)
plt.title('y Counting chart of axis display data')
plt.xlabel('count')
plt.show()

# Draw count graph of multi category nesting
sns.countplot(x='5 Promotion within the year', hue='salary', data=hr, palette='Set2')
plt.suptitle('Multivariate Scatter Chart')
plt.ylabel('total')
plt.show()

3. Draw univariate distribution (histogram) - distplot()

#Draw univariate distribution map
sns.distplot(boston['property tax'], kde=False)
plt.title('Distribution map of single variable')
plt.ylabel('quantity')
plt.show()

4. Draw classification scatter plots - stripplot(), swarmplot()

(1)stripplot( )

stripplot() receives multiple types of transferred data, including lists, Numpy arrays, DataFrame s, Series, arrays, or vectors.

Main parameters:

jitter: it means adding uniform random noise (only changing the graph) to optimize the graph display. The default is True.

#Draw a simple horizontal distribution scatter plot
sale = hr.iloc[(hr['department'].values=='Sales Department') & (hr['quit'].values==1), :]
sns.stripplot(x=sale['Average working hours per month (hours)'])
plt.title('Simple Horizontal Scatter Chart')
plt.show()

#Add random noise jitter
hr1 = hr.iloc[hr['quit'].values==1, :]
plt.figure(figsize=(10, 5))
plt.subplot(121)
plt.xticks(rotation=70)
sns.stripplot(x='department', y='Average working hours per month (hours)', data=hr1)  # Add random noise by default
plt.title('Default random noise jitter')
plt.subplot(122)
plt.xticks(rotation=70)
sns.stripplot(x='department', y='Average working hours per month (hours)',
                 data=hr1, jitter=False)  # No random noise added
plt.title('No random noise jitter')
plt.show()

#Display the second classification condition in color
hr2 = hr.iloc[(hr['salary'].values=='high') & (hr['quit'].values==0), :]
sns.stripplot(x='5 Promotion within the year', y='Average working hours per month (hours)',
              hue='department', data=hr2, jitter=True)
plt.title('Promotion in the first five years and average monthly working hours')
plt.show()

#Classify variables along the classification axis
plt.figure(figsize=(70, 13))
plt.subplot(211)
plt.xticks(rotation=70)
plt.title('Average monthly working hours of different departments')
sns.stripplot(x='department', y='Average working hours per month (hours)', hue='5 Promotion within the year', data=hr2)
plt.subplot(212)
plt.xticks(rotation=70)
sns.stripplot(x='department', y='Average working hours per month (hours)', hue='5 Promotion within the year', 
              data=hr2, dodge=True)
plt.show()

(2)swarmplot( )

After adding random noise to the stripplot function to increase the pattern jitter and drawing variables along the classification axis, there is still the possibility of overlap.

This can be avoided by using the swarmplot function, which can draw a classified scatter plot with non overlapping points.

#Draw a simple distribution density scatter plot
sns.swarmplot(x='department', y='Average working hours per month (hours)', data=hr2)
plt.xticks(rotation=70)
plt.title('Average monthly working hours of different departments')
plt.show()

#Add multiple nested classification variables
sns.swarmplot(x='department', y='Average working hours per month (hours)',
                 hue='5 Promotion within the year', data=hr2)
plt.xticks(rotation=30)
plt.title('Average monthly working hours of different departments')
plt.show()

5. Draw enhanced boxplot boxenplot()

#Draw common boxplot and enhanced boxplot
fig, axes = plt.subplots(1, 2, figsize=(8, 4))
axes[0].set_title('Ordinary Box Line Diagram')
boston['Number of rooms(Rounding)'] = boston['Number of rooms'].map(math.floor)  # Rounding the number of rooms
sns.boxplot(x='Number of rooms(Rounding)', y='House price (thousand dollars)', 
            data=boston, orient='v', ax=axes[0])  # ordinary
axes[1].set_title('Enhanced boxplot')
sns.boxenplot(x='Number of rooms(Rounding)', y='House price (thousand dollars)', 
              data=boston, orient='v', ax=axes[1])  # enhance
plt.show()

6. Draw classification grid combination diagram - pairplot()

Use the classification grid combination graph to draw the paired relationship in the dataset

#Draw a Multivariate Scatter Chart of Boston House Prices
sns.pairplot(boston[['crime rate', 'Nitric oxide content( ppm)', 'Number of rooms', 'Low income people', 'House price (thousand dollars)']])
plt.suptitle('Multivariate Scatter Chart', verticalalignment='bottom', y=1)
plt.show()

#Draws a scatter plot of the specified categorical variables
hr3 = sale[['Satisfaction', 'Total items', 'Length of service (years)', 'salary']]
sns.pairplot(hr3, hue='salary')
plt.suptitle('Multivariate classification scatter plot', verticalalignment='bottom')
plt.show()

4, Draw regression graph

1. Draw a linear regression fitting diagram - regplot()

from matplotlib import pyplot as plt
import pandas as pd
import seaborn as sns

# Set Chinese font
sns.set_style('whitegrid', {'font.sans-serif':['simhei', 'Arial']})

# Ignore Warnings
import warnings
warnings.filterwarnings('ignore')

# Load Data
boston = pd.read_csv('../data/boston_house_prices.csv', encoding='gbk')

#Draw the linear regression fitting diagram before and after modifying the confidence interval ci parameter
fig, axes = plt.subplots(1, 2, figsize=(8, 4))
axes[0].set_title('Linear regression fitting diagram before modification')
axes[1].set_title('Modified linear regression fitting diagram')
sns.regplot(x='Number of rooms', y='House price (thousand dollars)', data=boston, ax=axes[0])
sns.regplot(x='Number of rooms', y='House price (thousand dollars)', data=boston, ci=50, ax=axes[1])
plt.show()

2. Draw linear regression grid combination diagram - lmplot()

#Draw regression grid combination diagram of low-income population and housing price by river crossing behavior category
sns.lmplot(x='Low income people', y='House price (thousand dollars)', col='River crossing', data=boston)
plt.show()

'.../data/boston_house_prices.csv', encoding='gbk')

#Draw the linear regression fitting diagram before and after modifying the confidence interval ci parameter
fig, axes = plt.subplots(1, 2, figsize=(8, 4))
axes[0].set_title('Linear regression fitting chart before modification ')
axes[1].set_title('Modified Linear Regression Fitting Chart ')
sns.regplot(x='number of rooms', y=' house price (thousand dollars) ', data=boston, ax=axes[0])
sns.regplot(x='number of rooms', y=' house price (thousand dollars) ', data=boston, ci=50, ax=axes[1])
plt.show()

[External chain pictures are being transferred...(img-AgfMs08W-1664038679472)]

### 2. Draw linear regression grid combination diagram - lmplot()

```python
#Draw regression grid combination diagram of low-income population and housing price by river crossing behavior category
sns.lmplot(x='Low income people', y='House price (thousand dollars)', col='River crossing', data=boston)
plt.show()

Tags: Python

Posted by Alex C on Sat, 24 Sep 2022 22:08:59 +0530