## pyplot 1
## 예제 6_1
### test1 (자료 가져오기)
```python
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
# 한글 폰트 사용을 위해서 세팅
from matplotlib import font_manager, rc
font_path = "NanumGothic.ttf"
font = font_manager.FontProperties(fname=font_path).get_name()
rc('font', family=font)
# import matplotlib as mpl
# mpl.matplotlib_fname()
def testplot1():
ind_path = './indicators.csv'
dt_ind = {
'CountryName':np.unicode_,
'CountryCode':np.unicode_,
'IndicatorName':np.unicode_,
'IndicatorCode':np.unicode_,
'Year':np.int16,
'Value':np.float64
}
df_ind = pd.read_csv(ind_path,dtype=dt_ind,sep=',')
print(df_ind.head())
print('========')
uniq_countries = df_ind['CountryName'].unique()
num_ctry = uniq_countries.shape[0]
print('num of country :',num_ctry)
print(len(uniq_countries))
min_yr, max_yr = min(df_ind['Year']),max(df_ind['Year'])
print('year',min_yr,'to',max_yr)
return None
```
```
CountryName CountryCode ... Year Value
0 Arab World ARB ... 1960 1.335609e+02
1 Arab World ARB ... 1960 8.779760e+01
2 Arab World ARB ... 1960 6.634579e+00
3 Arab World ARB ... 1960 8.102333e+01
4 Arab World ARB ... 1960 3.000000e+06
[5 rows x 6 columns]
========
num of country : 247
247
year 1960 to 2015
```
### test2 (bar)
```python
def testplot2():
ind_path = './indicators.csv'
dt_ind = {
'CountryName':np.unicode_,
'CountryCode':np.unicode_,
'IndicatorName':np.unicode_,
'IndicatorCode':np.unicode_,
'Year':np.int16,
'Value':np.float64
}
df_ind = pd.read_csv(ind_path,dtype=dt_ind,sep=',')
# cond1 & cond2 ==> CO2 emission & korea
ind_co2 = 'CO2 emissions \(metric'
ctr_code = 'KOR'
ctr_code2 = 'USA'
ctr_code3 = 'JPN'
cond1 = df_ind['IndicatorName'].str.contains(ind_co2)
cond2 = df_ind['CountryCode'].str.contains(ctr_code3)
res = df_ind[cond1 & cond2]
#print(res.head())
years = res['Year'].values
co2 = res['Value'].values
plt.bar(years,co2)
plt.show()
return None
```
### test3 (graph)
```python
def testplot3():
ind_path = './indicators.csv'
dt_ind = {
'CountryName':np.unicode_,
'CountryCode':np.unicode_,
'IndicatorName':np.unicode_,
'IndicatorCode':np.unicode_,
'Year':np.int16,
'Value':np.float64
}
df_ind = pd.read_csv(ind_path,dtype=dt_ind,sep=',')
# cond1 & cond2 ==> CO2 emission & korea
ind_co2 = 'CO2 emissions \(metric'
ctr_code = 'KOR'
ctr_code2 = 'USA'
ctr_code3 = 'JPN'
code = ctr_code3
cond1 = df_ind['IndicatorName'].str.contains(ind_co2)
cond2 = df_ind['CountryCode'].str.contains(code)
res = df_ind[cond1 & cond2]
#print(res.head())
years = res['Year'].values
co2 = res['Value'].values
# switch into a line chart
plt.plot(years,co2)
# coordination
# xlabel, ylabel, title
# label the axes
plt.xlabel('Year')
plt.ylabel(res['IndicatorName'].iloc[0],fontsize = 15)
# label the graph
plt.title('CO2 Emissions in 한글 ' + str(code))
# 축 범위 설정
plt.axis([1959,2011,0,25])
plt.show()
return None
```
### test4 (histogram)
```python
def testplot4():
ind_path = './indicators.csv'
dt_ind = {
'CountryName':np.unicode_,
'CountryCode':np.unicode_,
'IndicatorName':np.unicode_,
'IndicatorCode':np.unicode_,
'Year':np.int16,
'Value':np.float64
}
df_ind = pd.read_csv(ind_path,dtype=dt_ind,sep=',')
# cond1 & cond2 ==> CO2 emission & korea
ind_co2 = 'CO2 emissions \(metric'
ctr_code = 'KOR'
ctr_code2 = 'USA'
ctr_code3 = 'JPN'
code = ctr_code
cond1 = df_ind['IndicatorName'].str.contains(ind_co2)
cond2 = df_ind['CountryCode'].str.contains(code)
co2_res = df_ind[cond1 & cond2]
#print(res.head())
#years = res['Year'].values
#co2 = res['Value'].values
# 오차범위 내 값만 사용하기
l_bnd = co2_res['Value'].mean() - co2_res['Value'].std()
u_bnd = co2_res['Value'].mean() + co2_res['Value'].std()
# extract values within error bnd
#hgram_data = [x for x in co2_res[:10000]['Value'] if l_bnd <= x and x <= u_bnd]
hgram_data = co2_res['Value'].values
print(hgram_data)
plt.hist(hgram_data,bins=20,density=False,histtype='bar',color='red')
plt.xlabel(co2_res['IndicatorName'].iloc[0])
plt.ylabel('Years')
plt.title('CO2 Production Distribution in Korea')
plt.show()
return None
```
### test5 (annotate)
```python
def testplot5():
ind_path = './indicators.csv'
dt_ind = {
'CountryName':np.unicode_,
'CountryCode':np.unicode_,
'IndicatorName':np.unicode_,
'IndicatorCode':np.unicode_,
'Year':np.int16,
'Value':np.float64
}
df_ind = pd.read_csv(ind_path,dtype=dt_ind,sep=',')
# cond1 & cond2 ==> CO2 emission & korea
ind_co2 = 'CO2 emissions \(metric'
years = [2011]
cond1 = df_ind['IndicatorName'].str.contains(ind_co2)
cond2 = df_ind['Year'].isin(years)
CO2_2011_all = df_ind[cond1 & cond2]
print(CO2_2011_all)
fig, axis = plt.subplots() # 그래프를 여러개 넣을수 있음
axis.set_xlabel(CO2_2011_all['IndicatorName'].iloc[0])
axis.set_ylabel('# of Countries')
#axis.set_title('CO2 Emissions per capita')
axis.hist(CO2_2011_all['Value'],bins=20,density=False,color='magenta')
fig.suptitle('CO2 Emissions per capita')
# Annotating
# ax.annotate(text,xy,xytext,xycoords,arrowprops)
# text : the text of the annotation
# xy: the point (x,y) to annotate
# xytext : the point (x,y) of the text
# xycoords : coordinate system
# arrowprops : propertiex of arrow
axis.annotate('Korea',(11,7),(13,13),xycoords='data',arrowprops=dict(arrowstyle = '->'))
plt.show()
return None
```
### test6 (scatter & correlation)
```python
def testplot6():
ind_path = './indicators.csv'
dt_ind = {
'CountryName':np.unicode_,
'CountryCode':np.unicode_,
'IndicatorName':np.unicode_,
'IndicatorCode':np.unicode_,
'Year':np.int16,
'Value':np.float64
}
df_ind = pd.read_csv(ind_path,dtype=dt_ind,sep=',')
# cond1 & cond2 ==> CO2 emission & korea
ind_co2 = 'CO2 emissions \(metric'
ctr_code = 'KOR'
ctr_code2 = 'USA'
input_ctr_code = ctr_code
cond1 = df_ind['IndicatorName'].str.contains(ind_co2)
cond2 = df_ind['CountryCode'].str.contains(input_ctr_code)
co2_res = df_ind[cond1 & cond2]
ind_gdp = 'GDP per capita \(constant 2005'
cond3 = df_ind['IndicatorName'].str.contains(ind_gdp)
cond2 = df_ind['CountryCode'].str.contains(input_ctr_code)
gdp_res = df_ind[cond3 & cond2]
gdp_res_to_2011 = gdp_res[gdp_res["Year"] < 2012]
#print(co2_res)
#print(gdp_res_to_2011)
fig, axis = plt.subplots() # 그래프를 여러개 넣을수 있음
axis.set_title('CO2 emissions cs. GDP\(percapia)')
axis.set_xlabel(gdp_res_to_2011['IndicatorName'].iloc[0])
axis.set_ylabel(co2_res['IndicatorName'].iloc[0])
x_data = gdp_res_to_2011['Value'].values
y_data = co2_res['Value'].values
axis.scatter(x_data,y_data)
plt.show()
# correlation
R = np.corrcoef(x_data,y_data)
print(R)
return None
```
```
[[1. 0.98102799]
[0.98102799 1. ]]
```
### test7 (boxplot)
```python
def testplot7():
ind_path = './indicators.csv'
dt_ind = {
'CountryName':np.unicode_,
'CountryCode':np.unicode_,
'IndicatorName':np.unicode_,
'IndicatorCode':np.unicode_,
'Year':np.int16,
'Value':np.float64
}
df_ind = pd.read_csv(ind_path,dtype=dt_ind,sep=',')
countries = ['KOR','USA']
ind_co2 = 'CO2 emissions \(metric'
cond_co2 = df_ind['IndicatorName'].str.contains(ind_co2)
df_co2 = df_ind[cond_co2]
cond_ctr = df_co2['CountryCode'].str.contains(countries[0])
co2_kor = df_co2[cond_ctr]['Value'].values
cond_ctr = df_co2['CountryCode'].str.contains(countries[1])
co2_usa = df_co2[cond_ctr]['Value'].values
co2_emissions = pd.DataFrame({'KOR':co2_kor,'USA':co2_usa})
fig, axis = plt.subplots(figsize = (10,8))
plt.boxplot(co2_emissions,notch=True,patch_artist=True)
plt.xlabel('Countries')
plt.ylabel(df_co2['IndicatorName'].iloc[0])
plt.title('CO2 emissions comparison')
plt.show()
return None
def main():
testplot1()
#testplot2()
#testplot3()
#testplot4()
#testplot5()
#testplot6()
#testplot7()
return None
if __name__ =='__main__':
main()
```
0 댓글