• 技术文章 >Python爬虫 >爬虫进阶

    数据可视化matplotlib

    流芳流芳2020-05-26 15:12:11转载4366
    Python9.jpg

    1

    2

    3

    4

    5

    6

    7

    8

    9

    10

    import matplotlib.pyplot as plt

    import numpy as np

    import numpy.random as randn

    import pandas as pd

    from pandas import Series,DataFrame

    from pylab import mpl

    mpl.rcParams['axes.unicode_minus'] = False # 我自己配置的问题

    plt.rc('figure', figsize=(10, 6)) # 设置图像大小

     

    %matplotlib inline

    1. figure对象

    Matplotlib的图像均位于figure对象中。

    1

    fig = plt.figure()

    2. subplot子图

    1

    2

    3

    4

    ax1 = fig.add_subplot(2,2,1)

    ax2 = fig.add_subplot(2,2,2)

    ax3 = fig.add_subplot(2,2,3)

    ax4 = fig.add_subplot(2,2,4)

    1

    2

    3

    4

    5

    6

    random_arr = randn.rand(50)

    # 默认是在最后一次使用subplot的位置上作图

    plt.plot(random_arr,'ro--') # r:表示颜色为红色,o:表示数据用o标记 ,--:表示虚线

    # 等价于:

    # plt.plot(random_arr,linestyle='--',color='r',marker='o')

    plt.show()

    09.jpg

    1

    2

    3

    # hist:直方图:统计分布情况

    plt.hist(np.random.rand(8), bins=6, color='b', alpha=0.3)

    # bins:数据箱子个数

    1

    2

    3

    4

    (array([ 3.,  0.,  0.,  0.,  2.,  3.]),

     array([ 0.10261627,  0.19557319,  0.28853011,  0.38148703,  0.47444396,

             0.56740088,  0.6603578 ]),

     <a list of 6 Patch objects>)

    04.jpg

    1

    2

    # 散点图

    plt.scatter(np.arange(30), np.arange(30) + 3 * randn.randn(30))

    15.jpg

    1

    2

    3

    4

    5

    6

    7

    8

    9

    # 柱状图

    fig, ax = plt.subplots()

    x = np.arange(5)

    y1, y2 = np.random.randint(1, 25, size=(2, 5))

    width = 0.25

    ax.bar(x, y1, width, color='r')

    # 画柱子ax.bar(x+width, y2, width, color='g')

    # 画柱子ax.set_xticks(x+width)

    ax.set_xticklabels(['a', 'b', 'c', 'd', 'e']) # 下标注明

    15.jpg

    1

    fig, axes = plt.subplots(2, 2, sharex=True, sharey=True) # 共享轴坐标

    51.jpg

    1

    plt.subplots_adjust(left=0.5,top=0.5)

    1

    fig, axes = plt.subplots(2, 2)

    53.jpg

    1

    2

    3

    4

    5

    6

    7

    8

    9

    10

    11

    12

    13

    random_arr = randn.randn(8)

    fig, axes = plt.subplots(2, 2)

    axes[0, 0].hist(random_arr, bins=16, color='k', alpha=0.5)

    axes[0, 1].plot(random_arr,'ko--')

    x = np.arange(8)

    y = x + 5 * np.random.rand(8)

    axes[1,0].scatter(x, y)

    x = np.arange(5)

    y1, y2 = np.random.randint(1, 25, size=(2, 5))

    width = 0.25axes[1,1].bar(x, y1, width, color='r') # 画柱子

    axes[1,1].bar(x+width, y2, width, color='g') # 画柱子

    axes[1,1].set_xticks(x+width)

    axes[1,1].set_xticklabels(['a', 'b', 'c', 'd', 'e']) # 下标注明

    03.jpg

    1

    random_arr1 = randn.randn(8)

    1

    random_arr2 = randn.randn(8)

    1

    2

    3

    4

    fig, ax = plt.subplots()

    ax.plot(random_arr1,'ko--',label='A')

    ax.plot(random_arr2,'b^--',label='B')

    plt.legend(loc='best') # 自动选择放置图例的最佳位置

    17.jpg

    1

    2

    3

    4

    5

    6

    7

    8

    9

    10

    11

    12

    13

    14

    15

    16

    17

    18

    fig, ax = plt.subplots(1)

    ax.plot(np.random.randn(380).cumsum())

     

    # 设置刻度范围a

    x.set_xlim([0, 500])

     

    # 设置显示的刻度(记号)

    ax.set_xticks(range(0,500,100))

     

    # 设置刻度标签

    ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'],

    rotation=30, fontsize='small')

     

    # 设置坐标轴标签ax.set_xlabel('X:...')

    ax.set_ylabel('Y:...')

     

    # 设置标题

    ax.set_title('Example')

    44.jpg

    3. Plotting functions in pandas

    1

    2

    3

    4

    5

    plt.close('all')

    s = Series(np.random.randn(10).cumsum(), index=np.arange(0, 100, 10))

    s

    fig,ax = plt.subplots(1)

    s.plot(ax=ax,style='ko--')

    314.jpg

    1

    2

    3

    4

    fig, axes = plt.subplots(2, 1)

    data = Series(np.random.rand(16), index=list('abcdefghijklmnop'))

    data.plot(kind='bar', ax=axes[0], color='k', alpha=0.7)

    data.plot(kind='barh', ax=axes[1], color='k', alpha=0.7)

    51.jpg

    1

    2

    3

    4

    df = DataFrame(np.random.randn(10, 4).cumsum(0),

                   columns=['A', 'B', 'C', 'D'],

                   index=np.arange(0, 100, 10))

    df

    ABCD
    0-0.5238221.061179-0.882215-0.267718
    10-0.178175-0.367573-1.465189-1.095390
    200.2761660.816511-0.3445571.297281
    300.5294000.159374-2.7651681.784692
    40-1.129003-1.665272-2.7465123.140976
    500.265113-1.821224-5.1408502.377449
    60-2.699879-3.895255-5.0115611.715174
    70-2.384257-3.480928-4.5191312.805369
    80-2.525243-3.031608-4.8401251.106624
    90-2.020589-3.519473-4.8232920.522323

    1

    df.plot() # 列索引为图例,行索引为横坐标,值为纵坐标

    44.jpg

    1

    2

    3

    4

    df = DataFrame(np.random.randint(0,2,(10, 2)),

                   columns=['A', 'B'],

                   index=np.arange(0, 10, 1))

    df

    AB
    001
    101
    210
    301
    410
    510
    611
    700
    810
    910

    1

    df.plot(kind='bar')

    58.jpg

    1

    df.A.value_counts().plot(kind='bar')

    15.jpg

    1

    2

    df.A[df.B == 1].plot(kind='kde')  

    df.A[df.B == 0].plot(kind='kde')    # 密度图

    33.jpg

    1

    2

    3

    4

    df = DataFrame(np.random.rand(6, 4),

                   index=['one', 'two', 'three', 'four', 'five', 'six'],

                   columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus'))

    df

    GenusABCD
    one0.7607500.9511590.6431810.792940
    two0.1372940.0054170.6856680.858801
    three0.2574550.7219730.9689510.043061
    four0.2981000.1212930.4006580.236369
    five0.4639190.5370550.6759180.487098
    six0.7986760.2391880.9155830.456184

    1

    df.plot(kind='bar',stacked='True') #行索引:横坐标

    24.jpg

    1

    2

    3

    values = Series(np.random.normal(0, 1, size=200))

    values.hist(bins=100, alpha=0.3, color='k', normed=True)

    values.plot(kind='kde', style='k--')

    59.jpg

    1

    2

    3

    4

    df = DataFrame(np.random.randn(10,2),

                   columns=['A', 'B'],

                   index=np.arange(0, 10, 1))

    df

    1

    plt.scatter(df.A, df.B)

    06.jpg

    更多python相关文章请关注python自学网

    专题推荐:可视化
    上一篇:jieba库的运用 下一篇:seaborn绘制各种图形

    相关文章推荐

    • Python与Echarts相结合的可视化工具:pyecharts

    全部评论我要评论

    © 2021 Python学习网 苏ICP备2021003149号-1

  • 取消发布评论
  • 

    Python学习网