# Pandas

## pandas import

In [1]:
import pandas as pd

In [2]:
import numpy as np

## create pandas Series

In [4]:
a = pd.Series(np.array(100), dtype=int)
a

0    100
dtype: int64

## create pandas Series with data type and index

In [5]:
b = pd.Series([1,2,3,4], dtype=int, index=['a', 'b', 'c', 'd'])
b

a    1
b    2
c    3
d    4
dtype: int64

## create pandas Series from dictionary

In [6]:
data = {'a' : 0., 'b' : 1., 'c' : 2.}
s = pd.Series(data)
s

a    0.0
b    1.0
c    2.0
dtype: float64

## get pandas data by index

In [7]:
s[2]

2.0

## get pandas data by 'named' index

In [8]:
s['c']

2.0

## create pandas from list, and set columns name

In [9]:
data = [['Alex',10],['Bob',12],['Clarke',13]]
df = pd.DataFrame(data,columns=['Name','Age'])
df

Unnamed: 0,Name,Age
0,Alex,10
1,Bob,12
2,Clarke,13


## get pandas column names

In [20]:
df.columns

Index(['Name', 'Age'], dtype='object')

## get pandas shape

In [10]:
df.shape

(3, 2)

## get pandas indexes

In [11]:
df.index

RangeIndex(start=0, stop=3, step=1)

## get pandas data by column names

In [12]:
df[['Age', 'Name']]

Unnamed: 0,Age,Name
0,10,Alex
1,12,Bob
2,13,Clarke


## get pandas data by row indexes

In [13]:
df.loc[1:]

Unnamed: 0,Name,Age
1,Bob,12
2,Clarke,13


## get pandas cell data by row/column

In [17]:
df.loc[1]['Age']

12

## Read from csv file 

In [18]:
df_demo = pd.read_csv('demo.csv')

### print the data frame

In [19]:
df_demo

Unnamed: 0,Mailbox,Notes ID,Team Leader,Github account,Session 1,Unnamed: 5,Session 2,Unnamed: 7,Session 3,Unnamed: 9,Session 4,Unnamed: 11,Session 5,Unnamed: 13,Session 6,Unnamed: 15,Session 7,Unnamed: 17,Session 8
0,pdlzhang@cn.ibm.com,Ping DZ Zhang/China/IBM,Cheng Chen,Y,Y,Incorrect,Y,N,Y,N,Y,N,Y,,N,,Y,,Y
1,yananzh@cn.ibm.com,Ya Nan YN Zhuang/China/IBM,Cheng Chen,Y,Y,Done,Y,Done,Y,Done,Y,Done,Y,,N 项目会议冲突,,Y,,Y
2,lizhjdl@cn.ibm.com,Zhen Jie DL Li/China/IBM,Cheng Chen,Y,Y,Done,N shift,Done,N shift,Done,N shift,Done,N shift,,N shift,,N shift,,N shift
3,yuanlyy@cn.ibm.com,Yuan Yuan YB Li/China/IBM,Cheng Chen,Y,Y,Done,Y,Incorrect,Y,Done,N 项目会议冲突,Incorrect,Y,,Y,,N,,Y
4,shuyang@cn.ibm.com,Yang Shu/China/IBM,Cheng Chen,Y,Y,Done,Y,Done,Y,Done,Y,Done,Y,,Y,,Y,,N会议冲突
5,zhouyldl@cn.ibm.com,Yi Li AZ Zhou/China/IBM,Cheng Chen,Y,Y,Done,Y,Done,Y,Done,N 项目会议冲突,Done,Y,,Y,,Y,,Y
6,lizidl@cn.ibm.com,Zi DL Li/China/IBM,Cheng Chen,Y,Y,Done,Y,Done,Y,Done,Y,Done,Y,,Y,,Y,,Y
7,wangyih@cn.ibm.com,Yi Han HY Wang/China/IBM,Cheng Chen,Y,Y,Done,Y,Done,Y,Done,Y,Incorrect,Y,,Y,,Y,,Y
8,guoqq@cn.ibm.com,Qian Qian QQ Guo/China/IBM,Yin Lu,Y,Y,Done,Y,Done,Y,Done,Y,Done,Y,,Y,,Y,,Y
9,shangx@cn.ibm.com,Xia Shang/China/IBM,Yin Lu,Y,Y,Done,Y,Done,Y,Done,Y,Done,Y,,Y,,Y,,Y


## print the head rows

In [20]:
df_demo.head()

Unnamed: 0,Mailbox,Notes ID,Team Leader,Github account,Session 1,Unnamed: 5,Session 2,Unnamed: 7,Session 3,Unnamed: 9,Session 4,Unnamed: 11,Session 5,Unnamed: 13,Session 6,Unnamed: 15,Session 7,Unnamed: 17,Session 8
0,pdlzhang@cn.ibm.com,Ping DZ Zhang/China/IBM,Cheng Chen,Y,Y,Incorrect,Y,N,Y,N,Y,N,Y,,N,,Y,,Y
1,yananzh@cn.ibm.com,Ya Nan YN Zhuang/China/IBM,Cheng Chen,Y,Y,Done,Y,Done,Y,Done,Y,Done,Y,,N 项目会议冲突,,Y,,Y
2,lizhjdl@cn.ibm.com,Zhen Jie DL Li/China/IBM,Cheng Chen,Y,Y,Done,N shift,Done,N shift,Done,N shift,Done,N shift,,N shift,,N shift,,N shift
3,yuanlyy@cn.ibm.com,Yuan Yuan YB Li/China/IBM,Cheng Chen,Y,Y,Done,Y,Incorrect,Y,Done,N 项目会议冲突,Incorrect,Y,,Y,,N,,Y
4,shuyang@cn.ibm.com,Yang Shu/China/IBM,Cheng Chen,Y,Y,Done,Y,Done,Y,Done,Y,Done,Y,,Y,,Y,,N会议冲突


## print the tail rows

In [21]:
df_demo.tail()

Unnamed: 0,Mailbox,Notes ID,Team Leader,Github account,Session 1,Unnamed: 5,Session 2,Unnamed: 7,Session 3,Unnamed: 9,Session 4,Unnamed: 11,Session 5,Unnamed: 13,Session 6,Unnamed: 15,Session 7,Unnamed: 17,Session 8
26,caijm@cn.ibm.com,Jin Ming JM Cai/China/IBM,Wu Dan,Y,Y,Done,Y,Done,Y,Done,Y,Done,Y,,Y,,Y,,Y
27,lidannie@cn.ibm.com,Li Dan N Nie/China/IBM,Wu Dan,Y,Y,Done,Y,Done,Y,Done,Y,Done,Y,,Y,,Y,,Y
28,dongmhtu@cn.ibm.com,Mei Hua TU Dong/China/IBM,Wu Dan,Y,Y,Done,Y,Done,Y,Incorrect,Y,Incorrect,Y,,Y,,Y,,Y
29,zhangwyi@cn.ibm.com,Wei Yi WY Zhang/China/IBM,Wu Dan,Y,Y,Done,Y,Done,Y,Done,Y,Done,Y,,Y,,Y,,Y
30,zhanglp@cn.ibm.com,Lian Ping Zhang/China/IBM,Heng Cai,（四年经验（以后答疑））,,,,,,,,,,,,,,,


## get one column

In [22]:
df_demo['Mailbox']

0     pdlzhang@cn.ibm.com
1      yananzh@cn.ibm.com
2      lizhjdl@cn.ibm.com
3      yuanlyy@cn.ibm.com
4      shuyang@cn.ibm.com
5     zhouyldl@cn.ibm.com
6       lizidl@cn.ibm.com
7      wangyih@cn.ibm.com
8        guoqq@cn.ibm.com
9       shangx@cn.ibm.com
10     xinzhdl@cn.ibm.com
11      ymying@cn.ibm.com
12    jmhjiang@cn.ibm.com
13    xiaoywdl@cn.ibm.com
14      wpeter@cn.ibm.com
15     issczli@cn.ibm.com
16      zoujin@cn.ibm.com
17       fygao@cn.ibm.com
18    dllsyliu@cn.ibm.com
19      dlwjia@cn.ibm.com
20       ssliu@cn.ibm.com
21     fenghao@cn.ibm.com
22     xhongdl@cn.ibm.com
23    haojiedl@cn.ibm.com
24    mumeilun@cn.ibm.com
25     jinglil@cn.ibm.com
26       caijm@cn.ibm.com
27    lidannie@cn.ibm.com
28    dongmhtu@cn.ibm.com
29    zhangwyi@cn.ibm.com
30     zhanglp@cn.ibm.com
Name: Mailbox, dtype: object

## get part of the columns

In [23]:
df_demo[['Mailbox', 'Notes ID']]

Unnamed: 0,Mailbox,Notes ID
0,pdlzhang@cn.ibm.com,Ping DZ Zhang/China/IBM
1,yananzh@cn.ibm.com,Ya Nan YN Zhuang/China/IBM
2,lizhjdl@cn.ibm.com,Zhen Jie DL Li/China/IBM
3,yuanlyy@cn.ibm.com,Yuan Yuan YB Li/China/IBM
4,shuyang@cn.ibm.com,Yang Shu/China/IBM
5,zhouyldl@cn.ibm.com,Yi Li AZ Zhou/China/IBM
6,lizidl@cn.ibm.com,Zi DL Li/China/IBM
7,wangyih@cn.ibm.com,Yi Han HY Wang/China/IBM
8,guoqq@cn.ibm.com,Qian Qian QQ Guo/China/IBM
9,shangx@cn.ibm.com,Xia Shang/China/IBM


## data cleaning, process missing data

In [24]:
df_demo.fillna('N', inplace=True)

In [25]:
df_demo

Unnamed: 0,Mailbox,Notes ID,Team Leader,Github account,Session 1,Unnamed: 5,Session 2,Unnamed: 7,Session 3,Unnamed: 9,Session 4,Unnamed: 11,Session 5,Unnamed: 13,Session 6,Unnamed: 15,Session 7,Unnamed: 17,Session 8
0,pdlzhang@cn.ibm.com,Ping DZ Zhang/China/IBM,Cheng Chen,Y,Y,Incorrect,Y,N,Y,N,Y,N,Y,N,N,N,Y,N,Y
1,yananzh@cn.ibm.com,Ya Nan YN Zhuang/China/IBM,Cheng Chen,Y,Y,Done,Y,Done,Y,Done,Y,Done,Y,N,N 项目会议冲突,N,Y,N,Y
2,lizhjdl@cn.ibm.com,Zhen Jie DL Li/China/IBM,Cheng Chen,Y,Y,Done,N shift,Done,N shift,Done,N shift,Done,N shift,N,N shift,N,N shift,N,N shift
3,yuanlyy@cn.ibm.com,Yuan Yuan YB Li/China/IBM,Cheng Chen,Y,Y,Done,Y,Incorrect,Y,Done,N 项目会议冲突,Incorrect,Y,N,Y,N,N,N,Y
4,shuyang@cn.ibm.com,Yang Shu/China/IBM,Cheng Chen,Y,Y,Done,Y,Done,Y,Done,Y,Done,Y,N,Y,N,Y,N,N会议冲突
5,zhouyldl@cn.ibm.com,Yi Li AZ Zhou/China/IBM,Cheng Chen,Y,Y,Done,Y,Done,Y,Done,N 项目会议冲突,Done,Y,N,Y,N,Y,N,Y
6,lizidl@cn.ibm.com,Zi DL Li/China/IBM,Cheng Chen,Y,Y,Done,Y,Done,Y,Done,Y,Done,Y,N,Y,N,Y,N,Y
7,wangyih@cn.ibm.com,Yi Han HY Wang/China/IBM,Cheng Chen,Y,Y,Done,Y,Done,Y,Done,Y,Incorrect,Y,N,Y,N,Y,N,Y
8,guoqq@cn.ibm.com,Qian Qian QQ Guo/China/IBM,Yin Lu,Y,Y,Done,Y,Done,Y,Done,Y,Done,Y,N,Y,N,Y,N,Y
9,shangx@cn.ibm.com,Xia Shang/China/IBM,Yin Lu,Y,Y,Done,Y,Done,Y,Done,Y,Done,Y,N,Y,N,Y,N,Y
