一、Matplotlib简介

Matplotlib is a comprehensive library for creating static, animated, and interactive visualizations in Python. Matplotlib makes easy things easy and hard things possible.

二、使用Matplotlib生成图像

1、线形图

# 折线图
import numpy as np
import matplotlib.pyplot as plt

x = np.random.random(10)
plt.plot(x)
plt.show()

line_1.png

# 直线图
import numpy as np
import matplotlib.pyplot as plt

x = np.linspace(1, 10, 10)
plt.plot(x)
plt.show()

line_2.png

# 曲线图
import numpy as np
import matplotlib.pyplot as plt

x = np.linspace(1, 10, 10)
plt.plot(x, x ** 2)
plt.show()

line_3.png

# 点状图
import numpy as np
import matplotlib.pyplot as plt

x = np.linspace(1, 10, 10)
plt.plot(x, x ** 2, 'bo')
plt.show()

line_4.png

# 红点状图
import numpy as np
import matplotlib.pyplot as plt

x = np.linspace(1, 10, 10)
plt.plot(x, x ** 2, 'r+')
plt.show()

line_5.png

# 包含多条曲线的图
import numpy as np
import matplotlib.pyplot as plt

x = np.linspace(1, 10, 10)
plt.plot(x, x * 2)
plt.plot(x, x / 2)
plt.plot(x, x ** 2)
# 也可以使用plt.plot(x, x * 2, x, x / 2, x, x ** 2)来绘制三条曲线(不推荐)
plt.show()

line_6.png

# 开启网格线绘制图形
import numpy as np
import matplotlib.pyplot as plt

x = np.arange(-np.pi, np.pi, 0.01)
plt.plot(x, np.sin(x), x, np.cos(x))
plt.grid(True)  # 开启网格线
plt.show()

line_7.png

2、图片样式及风格

网格线及子视图

# 创建子视图
import numpy as np
import matplotlib.pyplot as plt

plt.figure(figsize=[15, 7])  # 设置画布大小
axes = plt.subplot(1, 3, 1)  # 创建一个子视图,1行3列1号
x = np.arange(-20, 20, 0.1)
axes.plot(x, np.sin(x))

axes2 = plt.subplot(1, 3, 2)  # 创建第二个子视图
x2 = np.arange(-20, 20, 0.1)
axes2.plot(x, np.sin(x))

axes3 = plt.subplot(1, 3, 3)  # 创建第三个子视图
axes3.plot(x, np.arcsin(x))

plt.show()

line_8.png

# 修改网格线样式
import numpy as np
import matplotlib.pyplot as plt

plt.figure(figsize=[15, 8])
axes = plt.subplot(1, 3, 1)  # 创建一个子视图
x = np.arange(-20, 20, 0.1)
axes.grid(color='r', linestyle='--', linewidth=2)  # 设置网格颜色、线形、线的宽度
axes.plot(x, np.sin(x))

axes2 = plt.subplot(1, 3, 2)  # 创建第二个子视图
x2 = np.arange(-20, 20, 0.1)
axes2.grid(color='g', linestyle='--')
axes2.plot(x, np.sin(x))

axes3 = plt.subplot(1, 3, 3)  # 创建第三个子视图
axes2.grid(color='blue', linestyle='--')
axes3.plot(x, np.arcsin(x))

plt.show()

line_9.png

坐标轴界限

# 设置坐标轴界限
import numpy as np
import matplotlib.pyplot as plt

x = np.random.random(10)
plt.axis([-5, 15, -5, 10])  # 设置坐标轴范围[xmin, xmax, ymin, ymax]
# xlim()和ylimit()与上面等效
# plt.xlim(-5, 15)
# plt.ylim(-5, 10)
# plt.axis('off') 关闭坐标轴
plt.plot(x)
plt.show()

line_10.png

# 画个圆
import numpy as np
import matplotlib.pyplot as plt

x = np.linspace(-5, 5, 100)
plt.plot(np.sin(x), np.cos(x))
plt.axis('equal')
plt.show()

line_11.png

坐标轴的标签

# 设置坐标轴的标题和标签
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import font_manager

x = np.arange(0, 10, 2)
y = x ** 2 + 5
plt.plot(x, y)

my_font = font_manager.FontProperties(family='SimSun', size=20)  # family指定字体(系统内,SimSun宋体),fname可以使用ttf文件路径的字体
plt.title('坐标标签测试图', fontproperties=my_font)  # 设置标题
plt.xlabel('x', size=20)
plt.ylabel('f(x)=x^2+5', size=20, rotation=10)  # rotation为旋转角度
plt.show()

line_12.png

图例

# 图例
import numpy as np
import matplotlib.pyplot as plt

x = np.arange(0, 10, 2)
plt.plot(x, x, label='x')
plt.plot(x, x * 2, label='2x')
plt.plot(x, x ** 2, label='x^2')
plt.legend()  # 创建图例
plt.show()

line_13.png

Location String

Location Code

'best' (Axes only)

0

'upper right'

1

'upper left'

2

'lower left'

3

'lower right'

4

'right'

5

'center left'

6

'center right'

7

'lower center'

8

'upper center'

9

'center'

10

import numpy as np
import matplotlib.pyplot as plt

x = np.arange(0, 10, 2)
plt.plot(x, x)
plt.plot(x, x * 2)
plt.plot(x, x ** 2)
plt.legend(['x', '2x', 'x^2'], loc='right')  # 修改图例位置,参数参考上表
plt.show()

line_14.png

线条样式

import numpy as np
import matplotlib.pyplot as plt

x1 = np.random.randn(100)
x2 = np.random.randn(100)
x3 = np.random.randn(100)

plt.plot(x1.cumsum(), color='blue', linestyle='--', marker='o')
plt.plot(x2.cumsum(), color='green', linestyle='-', marker='>')
plt.plot(x3.cumsum(), color='black', linestyle=':', marker='D')

plt.legend(['x1', 'x2', 'x3'])
plt.savefig('./pic.jpg', dpi=150)  # 保存图片,可使用facecolor自定义背景颜色
plt.show()

line_15.png

import numpy as np
import matplotlib.pyplot as plt

x = np.arange(0, 10, 0.1)

axes = plt.subplot(facecolor='pink')
axes.plot(x, np.sin(x), color='green',  # 线条颜色
          alpha=0.3,  # 0~1之间的透明度
          ls='-',  # 线形状,与linestyle等效
          lw=10  # 线条宽度与linewidth等效
          )
plt.show()

line_16.png

坐标轴刻度

# 坐标刻度样式
import numpy as np
import matplotlib.pyplot as plt

x = np.random.randn(100)
plt.plot(x.cumsum())

plt.xticks(np.linspace(1, 100, 7), list('ABCDEFG'), rotation=90)
plt.yticks(np.linspace(-10, 20, 3), ['min', 0, 'max'], fontsize=15)
plt.show()

line_17.png

3、函数图像绘制

# 获知sinx、cosx和sqrt(x)
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rcParams

# 全局字体
config = {
    "mathtext.fontset": 'stix',
    "font.family": 'serif',
    "font.serif": ['Times New Roman'],  # 字体
    "font.size": 22,
    'axes.unicode_minus': False  # 处理负号,即-号
}
rcParams.update(config)
# 设置画布按大小
plt.figure(figsize=(10, 6))
# 数据内容
x = np.arange(0, 10, 0.2)
y1 = np.sin(x)
y2 = np.cos(x)
y3 = np.sqrt(x)
# 绘制数据
plt.plot(x, y1, color='darkseagreen', lw=1, ls='-', marker='.', label='$y=sinx$')
plt.plot(x, y2, color='dodgerblue', lw=1, ls='-', marker='*', label='$y=cosx$')
plt.plot(x, y3, color='orchid', lw=1, ls='-', marker='3', label='$y=\sqrt{x}$')
# 限制横纵坐标范围
plt.xlim((0, 10.5))
plt.ylim((-1.5, 4))
# 添加子图来修改坐标轴位置
ax = plt.subplot(1, 1, 1)
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')
ax.spines['bottom'].set_position(('data', 0))
ax.spines['left'].set_position(('data', 0))

x0 = 8
y0 = 2 * np.sqrt(2)
# 绘制2根2的点
plt.plot(x0, y0, marker='o', color='orchid')
# 添加箭头及文字
plt.annotate('$2\sqrt{2}$', xy=(x0, y0),
             textcoords='offset points', xytext=(+10, -30), fontsize=16,
             arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0'))
# 添加xy的范围文字
plt.annotate('$x\in[0.0,10.0]$\n$y\in[-1.0,4.0]$', xy=(3.8, 2), fontsize=16,
             textcoords='offset points', xytext=(+10, -30))

# 开启网格线
plt.grid()
# 设置图例
plt.legend(numpoints=2, fontsize=14)
plt.title('the function figure of cos(), sin() and sqrt()')
# 修改x轴文字距离x轴的距离
plt.xlabel('the input value of x', labelpad=80)
# 修改y轴的旋转度数
plt.ylabel('y=f(x)', rotation=90)
plt.show()

sinx_cosx_sqrt.png

4、其他图形样式

直方图

import numpy as np
import matplotlib.pyplot as plt

x = np.random.randint(0, 10, 10)
plt.hist(x, density=True, orientation='horizontal', color='red')
plt.show()

hist_chart_demo.png

条形图

import numpy as np
import matplotlib.pyplot as plt

x = np.linspace(0, 5, 5)
y = np.random.randint(0, 20, size=5)
# plt.bar(x, y) 竖直方向
plt.barh(x, y)
plt.show()

barh_chart_demo.png

饼图

import numpy as np
import matplotlib.pyplot as plt

p = np.array([0.4, 0.2, 0.15, 0.15, 0.1])
plt.pie(p, labels=['b', 'b', 'c', 'd', 'e'], autopct='%.2f%%')
plt.show()

pie_chart_demo.png

散点图

import numpy as np
import matplotlib.pyplot as plt

x = np.random.randn(1000)
y = np.random.randn(1000)
plt.scatter(x, y, color='r', s=18)
plt.show()

scatter_chart_demo.png

箱型图

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

df = pd.DataFrame(np.random.rand(10, 5))
plt.boxplot(df)
plt.show()

boxplot_chart_demo.png

小提琴图

import numpy as np
import matplotlib.pyplot as plt

plt.violinplot(dataset=np.random.normal(size=1000))
plt.show()

violinplot_chart_demo.png

三、根据数据集绘制图像

1、2000年以前温室气体浓度

# 根据csv绘制图像
import os
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import font_manager

# 数据准备
isExist = os.path.exists('./greenhouse_gases.csv')
if not isExist:
    url = 'https://vincentarelbundock.github.io/Rdatasets/csv/dslabs/greenhouse_gases.csv'
    open('./greenhouse_gases.csv', 'wb').write(requests.get(url).content)

# 设置画布按大小
plt.figure(figsize=(15, 10))
# 设置字体
title_font = font_manager.FontProperties(family='SimHei', size=25)  # SimHei黑体
label_font = font_manager.FontProperties(family='SimHei', size=15)

# 读取温室气体浓度数据
gas_data = pd.read_csv('./greenhouse_gases.csv')
# 数据根据气体分组
gas_group = gas_data.groupby(by='gas', sort=True)
gas_list = gas_data['gas'].unique()
for gas in gas_list:
    gas_data_by_type = gas_group.get_group(gas)
    # 年份和浓度数据
    year_list = gas_data_by_type['year']
    year_list = ['{}年'.format(y) for y in year_list]
    concentration_list = gas_data_by_type['concentration']
    # 绘制图像
    line = np.linspace(1, len(concentration_list), len(concentration_list))
    plt.plot(line, concentration_list, label=gas)
    # 修改x轴坐标数值
    plt.xticks(np.arange(1, 100, step=10), year_list[::int(len(year_list) / 10)], fontsize=15, fontproperties=label_font)
    plt.yticks(fontsize=15, fontproperties=label_font)

# 开启网格线
plt.grid()
# 设置标题
plt.title('2000年以前温室气体浓度趋势图', fontproperties=title_font)
# 创建图例并绘制图像
plt.legend(fontsize=15)
plt.show()

greenhouse_gases_data_pic.png

2、2013年纽约起飞的所有航班准点率

数据含义描述

Flights Data Download Link

Flights data

Description

On-time data for all flights that departed NYC (i.e. JFK, LGA or EWR) in 2013.

Usage

flights

Format

Data frame with columns

year, month, day : Date of departure.

dep_time, arr_time : Actual departure and arrival times (format HHMM or HMM), local tz.

sched_dep_time, sched_arr_time : Scheduled departure and arrival times (format HHMM or HMM), local tz.

dep_delay, arr_delay : Departure and arrival delays, in minutes. Negative times represent early departures/arrivals.

carrier : Two letter carrier abbreviation. See airlines to get name.

flight : Flight number.

tailnum : Plane tail number. See planes for additional metadata.

origin, dest : Origin and destination. See airports for additional metadata.

air_time : Amount of time spent in the air, in minutes.

distance : Distance between airports, in miles.

hour, minute : Time of scheduled departure broken into hour and minutes.

time_hour : Scheduled date and hour of the flight as a POSIXct date. Along with origin, can be used to join flights data to weather data.

Source

RITA, Bureau of transportation statistics, https://www.transtats.bts.gov/DL_SelectFields.asp?Table_ID=236

数据内容查看

import pandas as pd

# 读取航班的准点率数据
flights_data = pd.read_csv('./flights.csv')
# 不省略数据列
pd.set_option('display.max_columns', None)
# 查看数据基本信息
print('top 5 data: \n', flights_data.head(5), end='\n' + '-' * 80 + '\n')
print('data info : \n', flights_data.info(), end='\n' + '-' * 80 + '\n')
print('data describe : \n', flights_data.describe())

''' 输出:
top 5 data: 
    Unnamed: 0  year  month  day  dep_time  sched_dep_time  dep_delay   
0           1  2013      1    1     517.0             515        2.0  \
1           2  2013      1    1     533.0             529        4.0   
2           3  2013      1    1     542.0             540        2.0   
3           4  2013      1    1     544.0             545       -1.0   
4           5  2013      1    1     554.0             600       -6.0   

   arr_time  sched_arr_time  arr_delay carrier  flight tailnum origin dest   
0     830.0             819       11.0      UA    1545  N14228    EWR  IAH  \
1     850.0             830       20.0      UA    1714  N24211    LGA  IAH   
2     923.0             850       33.0      AA    1141  N619AA    JFK  MIA   
3    1004.0            1022      -18.0      B6     725  N804JB    JFK  BQN   
4     812.0             837      -25.0      DL     461  N668DN    LGA  ATL   

   air_time  distance  hour  minute            time_hour  
0     227.0      1400     5      15  2013-01-01 05:00:00  
1     227.0      1416     5      29  2013-01-01 05:00:00  
2     160.0      1089     5      40  2013-01-01 05:00:00  
3     183.0      1576     5      45  2013-01-01 05:00:00  
4     116.0       762     6       0  2013-01-01 06:00:00  
--------------------------------------------------------------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 336776 entries, 0 to 336775
Data columns (total 20 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   Unnamed: 0      336776 non-null  int64  
 1   year            336776 non-null  int64  
 2   month           336776 non-null  int64  
 3   day             336776 non-null  int64  
 4   dep_time        328521 non-null  float64
 5   sched_dep_time  336776 non-null  int64  
 6   dep_delay       328521 non-null  float64
 7   arr_time        328063 non-null  float64
 8   sched_arr_time  336776 non-null  int64  
 9   arr_delay       327346 non-null  float64
 10  carrier         336776 non-null  object 
 11  flight          336776 non-null  int64  
 12  tailnum         334264 non-null  object 
 13  origin          336776 non-null  object 
 14  dest            336776 non-null  object 
 15  air_time        327346 non-null  float64
 16  distance        336776 non-null  int64  
 17  hour            336776 non-null  int64  
 18  minute          336776 non-null  int64  
 19  time_hour       336776 non-null  object 
dtypes: float64(5), int64(10), object(5)
memory usage: 51.4+ MB
data info : 
 None
--------------------------------------------------------------------------------
data describe : 
           Unnamed: 0      year          month            day       dep_time   
count  336776.000000  336776.0  336776.000000  336776.000000  328521.000000  \
mean   168388.500000    2013.0       6.548510      15.710787    1349.109947   
std     97219.001466       0.0       3.414457       8.768607     488.281791   
min         1.000000    2013.0       1.000000       1.000000       1.000000   
25%     84194.750000    2013.0       4.000000       8.000000     907.000000   
50%    168388.500000    2013.0       7.000000      16.000000    1401.000000   
75%    252582.250000    2013.0      10.000000      23.000000    1744.000000   
max    336776.000000    2013.0      12.000000      31.000000    2400.000000   

       sched_dep_time      dep_delay       arr_time  sched_arr_time   
count   336776.000000  328521.000000  328063.000000   336776.000000  \
mean      1344.254840      12.639070    1502.054999     1536.380220   
std        467.335756      40.210061     533.264132      497.457142   
min        106.000000     -43.000000       1.000000        1.000000   
25%        906.000000      -5.000000    1104.000000     1124.000000   
50%       1359.000000      -2.000000    1535.000000     1556.000000   
75%       1729.000000      11.000000    1940.000000     1945.000000   
max       2359.000000    1301.000000    2400.000000     2359.000000   

           arr_delay         flight       air_time       distance   
count  327346.000000  336776.000000  327346.000000  336776.000000  \
mean        6.895377    1971.923620     150.686460    1039.912604   
std        44.633292    1632.471938      93.688305     733.233033   
min       -86.000000       1.000000      20.000000      17.000000   
25%       -17.000000     553.000000      82.000000     502.000000   
50%        -5.000000    1496.000000     129.000000     872.000000   
75%        14.000000    3465.000000     192.000000    1389.000000   
max      1272.000000    8500.000000     695.000000    4983.000000   

                hour         minute  
count  336776.000000  336776.000000  
mean       13.180247      26.230100  
std         4.661316      19.300846  
min         1.000000       0.000000  
25%         9.000000       8.000000  
50%        13.000000      29.000000  
75%        17.000000      44.000000  
max        23.000000      59.000000  
'''

机场航班数量(条形图)

adict.py:

# 不保证信息准确性
carrier_dict = {'UA': {'name_ch': '美国联合航空'}, 'AA': {'name_ch': '美国航空'}, 'B6': {'name_ch': '捷蓝航空'},
                'DL': {'name_ch': '达美航空'}, 'EV': {'name_ch': '美国快捷航空'}, 'MQ': {'name_ch': '特使航空'},
                'US': {'name_ch': '美孟航空'}, 'WN': {'name_ch': '美国西南航空'}, 'VX': {'name_ch': '维珍航空'},
                'FL': {'name_ch': '法国航空'}, 'AS': {'name_ch': 'FLY LILI'}, '9E': {'name_ch': '奋进航空'},
                'F9': {'name_ch': '边疆航空'}, 'HA': {'name_ch': '哈亚航空'}, 'YV': {'name_ch': '梅萨航空公司'},
                'OO': {'name_ch': '天西航空'}}

airport_code = {'EWR': {'name_ch': '纽瓦克自由国际机场'}, 'LGA': {'name_ch': '拉瓜迪亚机场'},
                'JFK': {'name_ch': '纽约肯尼迪机场'}, 'IAH': {'name_ch': '乔治布什国际机场'},
                'MIA': {'name_ch': '迈阿密国际机场'}, 'BQN': {'name_ch': '拉斐尔-埃尔南德斯机场'},
                'ATL': {'name_ch': '亚特兰大国际机场'}, 'ORD': {'name_ch': '奥黑尔国际机场'},
                'FLL': {'name_ch': '劳德代尔机场'}, 'IAD': {'name_ch': '华盛顿杜勒斯国际机场'},
                'MCO': {'name_ch': '奥兰多国际机场'}, 'PBI': {'name_ch': '西棕榈滩国际机场'},
                'TPA': {'name_ch': '坦帕机场'}, 'LAX': {'name_ch': '洛杉矶机场'}, 'SFO': {'name_ch': '旧金山机场'},
                'DFW': {'name_ch': '达拉斯-沃斯堡国际机场'}, 'BOS': {'name_ch': '洛干国际机场'},
                'LAS': {'name_ch': '麦克卡兰国际机场'}, 'MSP': {'name_ch': '圣保罗国际机场'},
                'DTW': {'name_ch': '维纳郡机场'}, 'RSW': {'name_ch': '西南佛罗里达地区机场'},
                'SJU': {'name_ch': '路易斯姆诺兹马灵机场'}, 'PHX': {'name_ch': '凤凰城机场'},
                'BWI': {'name_ch': '巴尔的摩华盛顿国际机场'}, 'CLT': {'name_ch': '夏洛特-道格拉斯国际机场'},
                'BUF': {'name_ch': '布法罗尼亚加拉国际机场'}, 'DEN': {'name_ch': '丹佛国际机场'},
                'SNA': {'name_ch': '约翰维纳机场'}, 'MSY': {'name_ch': '新奥尔良路易斯阿姆斯特朗国际机场'},
                'SLC': {'name_ch': '盐湖城国际机场'}, 'XNA': {'name_ch': '阿肯色西北地区机场'},
                'MKE': {'name_ch': '米切尔国际机场'}, 'SEA': {'name_ch': '西雅图塔克马国际机场'},
                'ROC': {'name_ch': '罗切斯特机场'}, 'SYR': {'name_ch': '雪城汉考克国际机场'},
                'SRQ': {'name_ch': '布雷登顿国际机场'}, 'RDU': {'name_ch': '罗利机场'},
                'CMH': {'name_ch': '哥伦布国际机场'}, 'JAX': {'name_ch': '杰克逊威尔机场'},
                'CHS': {'name_ch': '查尔斯顿国际机场'}, 'MEM': {'name_ch': '孟菲斯机场'},
                'PIT': {'name_ch': '匹兹堡机场'}, 'SAN': {'name_ch': '圣迭戈国际机场'},
                'DCA': {'name_ch': '罗纳德·里根华盛顿国家机场'}, 'CLE': {'name_ch': '霍普金斯国际机场'},
                'STL': {'name_ch': '兰伯特圣路易斯国际机场'}, 'MYR': {'name_ch': '默特尔比奇国际机场'},
                'JAC': {'name_ch': '杰克逊霍勒机场'}, 'MDW': {'name_ch': '米德威机场'},
                'HNL': {'name_ch': '火奴鲁鲁国际机场'}, 'BNA': {'name_ch': '那什维尔国际机场'},
                'AUS': {'name_ch': '奥斯汀机场'}, 'BTV': {'name_ch': '柏林顿机场'}, 'PHL': {'name_ch': '费城国际机场'},
                'STT': {'name_ch': '圣托马斯机场'}, 'EGE': {'name_ch': '伊格尔郡机场'},
                'AVL': {'name_ch': '阿什维尔地区机场'}, 'PWM': {'name_ch': '波特兰国际喷气机机场'},
                'IND': {'name_ch': '印第安纳波利斯机场'}, 'SAV': {'name_ch': '希尔顿首脑机场'},
                'CAK': {'name_ch': '阿克伦坎通地区机场'}, 'HOU': {'name_ch': '霍比机场'},
                'LGB': {'name_ch': '长滩地方机场'}, 'DAY': {'name_ch': '詹姆斯考克斯代顿机场'},
                'ALB': {'name_ch': '奥尔巴尼国际机场'}, 'BDL': {'name_ch': '布拉德利国际机场'},
                'MHT': {'name_ch': '曼彻斯特地方机场'}, 'MSN': {'name_ch': '麦迪逊机场'},
                'GSO': {'name_ch': '格林斯伯勒机场'}, 'CVG': {'name_ch': '辛辛那提国际机场'},
                'BUR': {'name_ch': '伯班克机场'}, 'RIC': {'name_ch': '里士满国际机场'},
                'GSP': {'name_ch': '格林威尔机场'}, 'GRR': {'name_ch': '肯特郡国际机场'},
                'MCI': {'name_ch': '堪萨斯城国际机场'}, 'ORF': {'name_ch': '诺福克国际机场'},
                'SAT': {'name_ch': '圣安东尼奥机场'}, 'SDF': {'name_ch': '路易斯威尔机场'},
                'PDX': {'name_ch': '波特兰国际机场'}, 'SJC': {'name_ch': '圣何塞地方机场'},
                'OMA': {'name_ch': '奥马哈机场'}, 'CRW': {'name_ch': '查尔斯顿耶格机场'},
                'OAK': {'name_ch': '奥克兰大都会国际机场'}, 'SMF': {'name_ch': '萨克拉门托大都会机场'},
                'TUL': {'name_ch': '塔尔萨国际机场'}, 'TYS': {'name_ch': '麦克吉提森机场'},
                'OKC': {'name_ch': '维尔罗杰斯机场'}, 'PVD': {'name_ch': '西奥多·弗朗西斯·格林机场'},
                'DSM': {'name_ch': '得梅因机场'}, 'PSE': {'name_ch': '梅塞德塔国际机场'},
                'BHM': {'name_ch': '伯明翰-沙特尔斯沃思国际机场'}, 'CAE': {'name_ch': '哥伦比亚机场'},
                'HDN': {'name_ch': '亚姆帕瓦雷机场'}, 'BZN': {'name_ch': '加拉丁机场'},
                'MTJ': {'name_ch': '蒙特罗斯郡机场'}, 'EYW': {'name_ch': '基韦斯特国际机场'},
                'PSP': {'name_ch': '帕姆斯普林斯地方机场'}, 'ACK': {'name_ch': '南塔克特纪念机场'},
                'BGR': {'name_ch': '班戈国际机场'}, 'ABQ': {'name_ch': '阿尔伯克基国际机场'},
                'ILM': {'name_ch': '威尔明顿国际机场'}, 'MVY': {'name_ch': '马撒葡萄园岛机场'},
                'SBN': {'name_ch': '南本德地区机场'}, 'LEX': {'name_ch': '布鲁格拉斯机场'},
                'CHO': {'name_ch': '夏洛茨维尔机场'}, 'TVC': {'name_ch': '特拉佛斯城机场'},
                'ANC': {'name_ch': '安克雷奇国际机场'}}
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams

import adict

# 设置画布按大小
plt.figure(figsize=(15, 10))
# 全局字体
config = {
    "mathtext.fontset": 'stix',
    "font.family": 'serif',
    "font.serif": ['KaiTi'],  # 楷体
    "font.size": 20,
    'axes.unicode_minus': False  # 处理负号,即-号
}
rcParams.update(config)
# 调整图形边距
plt.subplots_adjust(top=0.9, bottom=0.28)

# 读取航班的准点率数据
flights_data = pd.read_csv('./flights.csv')
flights_data.set_index('Unnamed: 0', inplace=True)
# 新增count列
flights_data.loc[:, 'count'] = pd.Series(np.ones(len(flights_data.index)))

# 过滤空数据(过滤行中包含NA的数据)
# 对行中某列/某几列的过滤
# flights_data = flights_data[flights_data.dep_time.notnull()]
flights_data = flights_data.dropna()  # 行中任意包含NA的过滤
# 仅留下origin, dest, count三列
flights_data = flights_data.loc[:, ['origin', 'dest', 'count']]
# 根据origin进行分组
flights_group = flights_data.groupby('origin', sort=True)
origin_arr = flights_data['origin'].unique()
# 初始化count_dict
count_dict = {}
for i in origin_arr:
    count_dict[i] = []
# 分别统计这些目的地的航班量
dest_arr = [des for des in adict.airport_code][3:13]  # 筛选部分目的地
for ori in origin_arr:
    group_by_ori = flights_group.get_group(ori)
    group_by_ori = group_by_ori[group_by_ori['dest'].isin(dest_arr)]
    group_by_ori = group_by_ori.groupby(['origin', 'dest'], sort=True)
    count_by_ori = group_by_ori.count()
    for des in dest_arr:
        if (ori, des) in count_by_ori.index:
            count_dict[ori].append(count_by_ori.loc[(ori, des)]['count'])
        else:
            count_dict[ori].append(0)

deviation = -0.25
for ori in origin_arr:
    plt.bar(np.arange(len(dest_arr)) + deviation, np.array(count_dict[ori]), 0.25,
            label=adict.airport_code[ori]['name_ch'])
    deviation += 0.25

plt.xticks(np.arange(len(dest_arr)),
           [adict.airport_code[des]['name_ch'] for des in dest_arr],
           rotation=55, ha='right')
plt.title('2013年纽约三大机场飞往各机场的航班数量图', fontsize=25)
plt.legend()
plt.show()

airpots_bar_char.png

各航司航班数量占比(饼图)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams

import adict

# 设置画布按大小
plt.figure(figsize=(15, 10))
# 全局字体
config = {
    "mathtext.fontset": 'stix',
    "font.family": 'serif',
    "font.serif": ['KaiTi'],  # 楷体
    "font.size": 30,
    'axes.unicode_minus': False  # 处理负号,即-号
}
rcParams.update(config)
# 调整图形边距
plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.02)

# 读取航班的准点率数据
flights_data = pd.read_csv('./flights.csv')
flights_data.set_index('Unnamed: 0', inplace=True)
# 新增count列
flights_data.loc[:, 'count'] = pd.Series(np.ones(len(flights_data.index)))
# 过滤空数据(过滤行中包含NA的数据)
flights_data = flights_data.dropna()

carrier_list = flights_data['carrier'].unique()
flights_group = flights_data.groupby('carrier', sort=False)
# 仅留下origin, dest, count三列
flights_data = flights_data.loc[:, ['carrier', 'count']]
# 数据整理
data_list = []
carrier_ch_list = [adict.carrier_dict.get(ca)['name_ch'] for ca in carrier_list]
for carrier in carrier_list:
    flights_group_by_carrier = flights_group.get_group(carrier)
    data_list.append(flights_group_by_carrier.count()['count'])

# 统一比例过小的数据为其他
other_sum = 0
data_list_new = []
carrier_ch_list_new = []
data_sum = np.array(data_list).sum()
for i in range(len(carrier_ch_list)):
    if 100 * data_list[i] / data_sum >= 3:
        data_list_new.append(data_list[i])
        carrier_ch_list_new.append(carrier_ch_list[i])
    else:
        other_sum += data_list[i]
data_list_new.append(other_sum)
carrier_ch_list_new.append('其他航空')
# 绘制饼图
patches, l_text, p_text = plt.pie(np.array(data_list_new), labels=carrier_ch_list_new,
                                  autopct='%.2f%%', explode=[0.05 for i in carrier_ch_list_new])
for t in p_text:
    t.set_size(20)  # 设置百分比字体大小
plt.title('2013年各航司从纽约起飞的航班数占比图', fontsize=35)
plt.show()

airpots_pie_chart.png