import numpy as np     # 수치 계산
import pandas as pd    # 데이터 테이블

# 시각화 관련 라이브러리
# 쥬피터 노트북 라인에 바로 그래프 그릴 수 있게 (형식적)
%matplotlib inline                 
import matplotlib.pyplot as plt    # 시각화(파이 차트, 바 차트 등등)
plt.style.use('seaborn-whitegrid')   # 스타일 적용
import seaborn as sns     # 시각화
import missingno          # null 값 시각화


# 머신러닝 : 데이터 전처리 -> 학습(train) -> 모델 -> 추론(inference) -> 좋은 모델
test = pd.read_csv('C:\\Users\\KJW\\Desktop\\python_basic\\resources\\titanic\\data_titanic\\test.csv')
train = pd.read_csv('C:\\Users\\KJW\\Desktop\\python_basic\\resources\\titanic\\data_titanic\\train.csv')


# No. 25 - If there are too many columns, you can reduce the number of columns it displays


# 판다스 컬럼 제한 해제
pd.set_option("display.max_columns", 12)
train.head()


# No. 28 - Sorting columns - looking at certain rows


train.sort_values('Fare', ascending=True)[101:105]


# No. 30 - Sorting a certain column with a certain condition


train[train['Fare'] > 80].head()


# No. 32 - Sort Columns by alphabetical order


train.sort_index(axis=1).head()    # axis=0 (행), axis=1(열)


# No. 33 - Multi Column filtering


train[(train.Age >= 30) & (train.Sex == 'female')].head()


# No. 34 - isin - filtering by conditions


train[train.Embarked.isin(['C'])].head()


train[train.Embarked.isin(['S'])].head()


# No. 35 - filtering by multi conditions in a column


train[train.Cabin.isin(['C50', 'C85'])]


# No. 36 - isin - filtering by conditions multi conditions in multi columns


train[train.Ticket.isin(['113572']) & train.Parch.isin([0])]


# No. 37 - Sorting and Counting sorted values


len(train[train.Fare == 80.0])

2


# No. 41 - Making a new empty DataFrame


sample = pd.DataFrame()
sample

# 크롤링


# No. 42 - Copying and creating a new DataFrame


temp = train.copy()    # 원본 데이터와 분리해서 새롭게 변수로 저장

temp.head()


# No. 43 - Creating a new column


# 맨 끝에 새로운 컬럼 추가하기
temp['New']  = 0

temp.head()


# 특정 위치에 새로운 컬럼 추가하기
temp.insert(0, 'New2', 0)
temp.head()


# No. 44 - Creating a new column by copying an existing column


temp['New']  = temp['Age']

temp.head()


# No. 45 - Creating a new Dataframe with a certain column


# 종속 변수, label, y값, 정답
y_train = train['Survived'] 

y_train.head()

0    0
1    1
2    1
3    1
4    0
Name: Survived, dtype: int64


# No. 46 - Creating a new Dataframe with certain rows


sample1 = train[0:3] 

sample1


# No. 47 - Creating a new Dataframe with certain columns


sample2 = train[['Pclass', 'Name', 'Sex']]  # Make sure there are two square brackets 대괄호 두 개 임을 명심하세요.

sample2.head()


# No. 48 - Creating a new Dataframe with certain columns and certain rows


sample3 = train[['Pclass', 'Name', 'Sex']] [1:3]

sample3


# No. 49 - Creating a new Dataframe with exisiting rows that matches a certain condition


sample4 = train.loc[train.Survived.values == 1]

sample4.head()


# No. 50 - Creating a new Dataframe with exisiting rows that matches multiple conditions


sample5 = train.loc[(train.Survived.values == 1 ) & (train.Pclass.values == 3 ) & (train.Sex.values == 'male' )]

sample5.head()


# No. 51 - Concat - Merging and creating a new data frame


# 행으로 위아래 합치기
data = pd.concat((train, test), sort=True)

print(data.shape)
data.head()

(1309, 12)


data.tail()


# No. 62 - filtering small categories using nlargest()


train.Parch.value_counts()

0    678
1    118
2     80
5      5
3      5
4      4
6      1
Name: Parch, dtype: int64


train.Parch.value_counts().sum()

891


# 특정 컬럼의 유일값 세고 top 3 뽑기
top3 = train.Parch.value_counts().nlargest(3)
print (top3)

0    678
1    118
2     80
Name: Parch, dtype: int64


# No. 63 - Narrowing down (filtering small categories using threshold)


frequencies = data["SibSp"].value_counts(normalize = True)
frequencies

0    0.680672
1    0.243697
2    0.032086
4    0.016807
3    0.015279
8    0.006875
5    0.004584
Name: SibSp, dtype: float64


# 특정 기준보다 작은 것 찾기
threshold = 0.015
small_categories = frequencies[frequencies < threshold].index
small_categories

Int64Index([8, 5], dtype='int64')


frequencies[frequencies < threshold]

8    0.006875
5    0.004584
Name: SibSp, dtype: float64


# 원하는 그룹만 추출하기
a = data.groupby('Pclass').get_group(1)
a.head()


# No. 64 - Groupby - Count


# count : 그룹 내 non-NA 개수 
train.groupby(['Pclass','Parch'])['Pclass'].count().to_frame()


# sum : 그룹 내 non-NA 합 
train.groupby(['Pclass','Parch'])['Pclass'].sum()

Pclass  Parch
1       0         163
        1          31
        2          21
        4           1
2       0         268
        1          64
        2          32
        3           4
3       0        1143
        1         165
        2         129
        3           9
        4           9
        5          15
        6           3
Name: Pclass, dtype: int64


# No. 65 - Groupby - Mean


train.groupby(['Pclass','Age'])['Survived'].mean()

Pclass  Age  
1       0.92     1.0
        2.00     0.0
        4.00     1.0
        11.00    1.0
        14.00    1.0
                ... 
3       61.00    0.0
        63.00    1.0
        65.00    0.0
        70.50    0.0
        74.00    0.0
Name: Survived, Length: 182, dtype: float64


# No. 66 - Groupby - count - cmap


# 데이터프레임으로 만들기 : to_frame()
train.groupby(['Pclass','Survived'])['Pclass'].count().to_frame()


# 데이터프레임에 색상 입히기 : style.background_gradient(cmap='summer_r')
train.groupby(['Pclass','Survived'])['Pclass'].count().to_frame().style.background_gradient(cmap='summer_r')


# No. 68 - Groupby - Crosstab


# crosstab : 요인(factors)별로 교차분석(cross tabulations)해서, 
# 행, 열 요인 기준 별로 빈도를 세어서 도수분포표(frequency table), 교차표(contingency table) 를 만들어줌
# 출처 : https://rfriend.tistory.com/280

pd.crosstab(train.Survived, train.Pclass, margins=True)


# No. 69 - where


data['Agroup'] = 1

# 나이 기준 별로 값 넣기
data.loc[(data.Age.values < 24.0), 'Agroup']= 0
data.loc[(data.Age.values > 30.0), 'Agroup']= 2

data.head()


# No. 70 - Grouping with bins


# column 출력제한 해제
pd.set_option('display.max_columns', 100)

# cut : 특정 조건에 맞게 데이터 분류
data['Age_Cuts'] = pd.cut(data.Age, 
                          bins=[0, 1, 3, 10, 18, 65, 99], 
                          labels=['Baby', 'Todler', 'Kid', 'Teens',  'Adult', 'Elderly'])
data.head()


data.insert(3, 'New1', 0)

data.head()


# No. 82 - Drop columns by label names


# 특정 컬럼을 삭제하고 싶다
data = data.drop('New1', axis=1)

data.head()


# No. 84 - Using missingno from matplotlib to see missing values


# 라이브러리, 패키지, 모듈, 클래스 import 해서 사용


missingno.matrix(data, figsize = (15,8))

<AxesSubplot:>


# (참고) No. 85 - Co-relation matrix


# np.bool -> bool로 수정


corr = data.corr()
corr


# 마스크 셋업
mask = np.zeros_like(corr, dtype=bool)
mask[np.triu_indices_from(mask)] = True

# plt 그래프 그리기
plt.figure(figsize=(14, 8))    # 그래프 셋업
plt.title('Overall Correlation of Titanic Features', fontsize=25)   # 그래프 타이틀

sns.heatmap(corr, mask=mask, annot=False, cmap='RdYlGn', linewidths=0.2, annot_kws={'size':20}) # 함수 내용 채우기

plt.show() # 그래프 보여주기


# (참고) No. 86 - Dispalying pie chart and countplot together using matplotlib


# countplot('Survived') -> countplot(x='Survived')로 수정


f,ax=plt.subplots(1,2,figsize=(15,6))
train['Survived'].value_counts().plot.pie(explode=[0,0.1],autopct='%1.1f%%',ax=ax[0],shadow=True)
ax[0].set_title('Survived')
ax[0].set_ylabel('')
sns.countplot(x='Survived',data=train,ax=ax[1])
ax[1].set_title('Survived')
plt.show()

pwd

'C:\\Users\\KJW\\Desktop\\python_basic'


import pandas as pd
talents_dir = 'C:\\Users\\KJW\\Desktop\\python_basic\\resources\\talents.CSV'
bonus_dir = 'C:\\Users\\KJW\\Desktop\\python_basic\\resources\\talents_bonus.CSV'


talents = pd.read_csv(talents_dir, encoding='euc-kr')
bonus = pd.read_csv(bonus_dir, encoding='euc-kr')


print(talents.shape)
talents.head()


print(bonus.shape)
bonus.head()


talents.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 637 entries, 0 to 636
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Name    637 non-null    object 
 1   Week    637 non-null    int64  
 2   Attend  637 non-null    int64  
 3   Month   637 non-null    int64  
 4   Quiz    10 non-null     float64
dtypes: float64(1), int64(3), object(1)
memory usage: 25.0+ KB


bonus.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156 entries, 0 to 155
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    156 non-null    object
 1   Month   156 non-null    int64 
 2   Get     156 non-null    int64 
dtypes: int64(2), object(1)
memory usage: 3.8+ KB


# 참석은 안 해도 퀴즈를 본 경우가 있다
# int으로 변환하기 전에는 NaN값이 없어야 한다
# Quiz 컬럼에서 NaN값을 0으로 채우기
# float로 되어있을 때 개수를 잘 못 세는 현상으로 추측되어 
# int로 변환을 했더니 알맞게 개수를 세었다
talents['Quiz'] = talents['Quiz'].fillna(0).astype(int)


# 5월 1주부터 추출
# talents1에 저장
talents1 = talents.loc[(talents['Attend'] == 1) & (talents['Month'] >= 5)]


# 각 사람 당 몇 번 출석했는지 확인
talents1 = talents1.groupby(['Name', 'Attend'], as_index=False).size()


# 1번 출석하면 400원으로 계산
# price_x 열을 만들어 저장
# talents1은 출석 데이터프레임이다
talents1['price_x'] = talents1['size'] * 400


# 퀴즈 한 사람 추출
# talents2에 저장
talents2 = talents.loc[talents['Quiz']==1]


# 각 사람 당 몇 번 퀴즈를 보았는지 확인
talents2 = talents2.groupby(['Name', 'Quiz'], as_index=False).size()


# 퀴즈 한 번 보면 100원으로 계산
# price_y 열을 만들어 저장
# talents2는 퀴즈 데이터프레임이다
talents2['price_y'] = talents2['size'] * 100


# talents1와 talents2를 합친다 
# 기준은 Name 이다
# outer은 합집합
# NaN값은 0으로
# talents3에 저장한다
talents3 = pd.merge(talents1, talents2, on='Name', how='outer').fillna(0)


# 5월 1주부터 추출
# bonus1에 저장
bonus1 = bonus.loc[ (bonus['Get'] == 1) & (bonus['Month'] >= 5) ]


# 각 사람당 몇 번 보너스를 받는지 확인
bonus1 =  bonus1.groupby(['Name', 'Get'], as_index=False).size()


# 보너스 한 번 받으면 400원으로 계산
# price_z 열을 만들어 저장
# bonus1는 보너스 데이터프레임이다
bonus1['price_z'] = bonus1['size'] * 400


# talents3와 bonus1를 합친다
# result에 저장한다
result = pd.merge(talents3, bonus1, on='Name', how='outer').fillna(0)


# price x, y, z을 더해서 total 열에 저장한다
# 보기 좋게 int 으로 변환한다
result['total'] = (result['price_x'] + result['price_y'] + result['price_z']).astype(int)


# result 어떻게 생겼는지 확인한다
result


# 이해하기 쉽게 컬럼명을 변경한다
result = result.rename(columns= {'Get':'Bonus', 'size' : 'size_z'})
result


# csv 파일로 저장한다
result_dir = 'C:\\Users\\KJW\\Desktop\\python_basic\\resources\\result.CSV'
result.to_csv(result_dir, sep=',', index=False, encoding='euc-kr')


# 총 금액을 출력한다
print('총 금액: ', result['total'].sum(), '원', end='')

총 금액:  70200 원


import numpy as np


a = np.array([1,2,3])
a

array([1, 2, 3])


b = np.array([(1.5,2, 3), (4, 5, 6)], dtype=float)
b

array([[1.5, 2. , 3. ],
       [4. , 5. , 6. ]])


c = np.array([ [(1.5, 2, 3), (4,5,6)], [(3,2,1), (4,5,6)] ], dtype=float)
c

array([[[1.5, 2. , 3. ],
        [4. , 5. , 6. ]],

       [[3. , 2. , 1. ],
        [4. , 5. , 6. ]]])


np.zeros((3, 4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])


np.ones((2, 3, 4), dtype=np.int16)

array([[[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]],

       [[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]]], dtype=int16)


d = np.arange(10, 26, 5)
d

array([10, 15, 20, 25])


np.arange(30)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])


np.arange(30).reshape(5,6)

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29]])


# -1 은 알아서 계산하라는 뜻이다
np.arange(30).reshape(5, -1)

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29]])


np.arange(30).reshape(3, -1)

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]])


np.arange(30).reshape(10, -1)

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14],
       [15, 16, 17],
       [18, 19, 20],
       [21, 22, 23],
       [24, 25, 26],
       [27, 28, 29]])


# 0부터 2까지 9개로 나누어라
np.linspace(0, 2, 9)

array([0.  , 0.25, 0.5 , 0.75, 1.  , 1.25, 1.5 , 1.75, 2.  ])


e = np.full((2, 2), 7)
e

array([[7, 7],
       [7, 7]])


# 2X2 identity matrix
f = np.eye(2)
f

array([[1., 0.],
       [0., 1.]])


np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])


np.eye(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])


np.random.random((2, 2))

array([[0.77671604, 0.11718419],
       [0.2930968 , 0.3393411 ]])


a1 = np.random.rand(5)
print(a1, '\n')

b1 = np.random.rand(2, 3)
print(b1)

[0.29744718 0.88303656 0.70272707 0.79426531 0.64414059] 

[[0.7735932  0.99829023 0.81266835]
 [0.21791089 0.19078023 0.14518325]]


a1 = np.random.randint(2, size=5)
print(a1, '\n')

b1 = np.random.randint(2, 4, size=5)
print(b1, '\n')

c1 = np.random.randint(1, 5, size=(2, 3))
print(c1)

[0 1 0 1 1] 

[2 2 3 3 2] 

[[3 4 2]
 [4 3 1]]


a1 = np.random.randn(5)
print(a1, '\n')

b1 = np.random.randn(2, 3)
print(b1, '\n')

sigma, mu = 1.5, 2.0

c1 = sigma * np.random.randn(5) + mu
print(c1)

[-1.54384929 -0.36207741  0.26291147  0.04342712 -1.14849666] 

[[ 0.200511   -0.0512603   1.037473  ]
 [ 0.07203835 -0.84998263  0.41155014]] 

[4.70029769 1.87158746 0.27014015 1.92486193 2.96782733]


# 랜던값을 실행할 때마다 같은 값으로 고정
np.random.seed(0)

print(np.random.rand(2, 3))

[[0.5488135  0.71518937 0.60276338]
 [0.54488318 0.4236548  0.64589411]]


print(b, '\n')
print(b.ndim)

[[1.5 2.  3. ]
 [4.  5.  6. ]] 

2


print(e, '\n')
print(e.size)

[[7 7]
 [7 7]] 

4


b.dtype

dtype('float64')


b.dtype.name

'float64'


b.astype(int)

array([[1, 2, 3],
       [4, 5, 6]])


g = a-b
g

array([[-0.5,  0. ,  0. ],
       [-3. , -3. , -3. ]])


np.subtract(a,b)

array([[-0.5,  0. ,  0. ],
       [-3. , -3. , -3. ]])


b + a

array([[2.5, 4. , 6. ],
       [5. , 7. , 9. ]])


np.add(a, b)

array([[2.5, 4. , 6. ],
       [5. , 7. , 9. ]])

a/b

array([[0.66666667, 1.        , 1.        ],
       [0.25      , 0.4       , 0.5       ]])

a*b

array([[ 1.5,  4. ,  9. ],
       [ 4. , 10. , 18. ]])


np.multiply(a,b)

array([[ 1.5,  4. ,  9. ],
       [ 4. , 10. , 18. ]])


# 밑이 자연상수 e(2.41)인 지수함수
np.exp(b)

array([[  4.48168907,   7.3890561 ,  20.08553692],
       [ 54.59815003, 148.4131591 , 403.42879349]])


# 제곱근
np.sqrt(b)

array([[1.22474487, 1.41421356, 1.73205081],
       [2.        , 2.23606798, 2.44948974]])


np.sin(a)

array([0.84147098, 0.90929743, 0.14112001])


np.cos(a)

array([ 0.54030231, -0.41614684, -0.9899925 ])


np.log(a)

array([0.        , 0.69314718, 1.09861229])


# 행렬 내적 : e와 f를 내적(dot product)한다
print(e, '\n')
print(f, '\n')
print(e.dot(f))

[[7 7]
 [7 7]] 

[[1. 0.]
 [0. 1.]] 

[[7. 7.]
 [7. 7.]]


a == b

array([[False,  True,  True],
       [False, False, False]])


print(a, '\n')
print(a < 2)

[1 2 3] 

[ True False False]


np.array_equal(a, b)

False


# 행 기준 최대값
print(b)
print(b.max(axis=0))

[[1.5 2.  3. ]
 [4.  5.  6. ]]
[4. 5. 6.]


# 열 기준 누적합
b.cumsum(axis=1)

array([[ 1.5,  3.5,  6.5],
       [ 4. ,  9. , 15. ]])


np.std(b)

1.5920810978785667


b = np.array([4,5,6])


v = a.view()
v

array([1, 2, 3])


h = a.copy()
h

array([1, 2, 3])


a = a+b
a

array([5, 7, 9])

h

array([1, 2, 3])


a.sort()
a

array([5, 7, 9])


print(c, '\n')

c.sort(axis=0)
print(c, '\n')

[[[1.5 2.  3. ]
  [4.  5.  6. ]]

 [[3.  2.  1. ]
  [4.  5.  6. ]]] 

[[[1.5 2.  1. ]
  [4.  5.  6. ]]

 [[3.  2.  3. ]
  [4.  5.  6. ]]]


b = np.array([(1.5,2, 3), (4, 5, 6)], dtype=float)
print(b, '\n')
print(b[0:2, 1])

[[1.5 2.  3. ]
 [4.  5.  6. ]] 

[2. 5.]


c = np.array([[(1.5, 2, 3), (4,5,6)], [(3,2,1), (4,5,6)]], dtype=float)
print(c, '\n')
print(c[1, :])

[[[1.5 2.  3. ]
  [4.  5.  6. ]]

 [[3.  2.  1. ]
  [4.  5.  6. ]]] 

[[3. 2. 1.]
 [4. 5. 6.]]


# 역순 정렬
a[::-1]

array([9, 7, 5])


a[a<6]

array([5])


# Select elements (1,0),(0,1),(1,2) and (0,0)
print(b, '\n')
b[[1,0,1,0], [0,1,2,0]]

[[1.5 2.  3. ]
 [4.  5.  6. ]]

array([4. , 2. , 6. , 1.5])


print(b, '\n')

# 행렬 바꾸기
i = np.transpose(b)
i

[4 5 6]

array([4, 5, 6])

i.T

array([4, 5, 6])


print(b, '\n')

# flatten the array
b.ravel()

[4 5 6]

array([4, 5, 6])


print(g, '\n')
g.reshape(3, 2)

[[-0.5  0.   0. ]
 [-3.  -3.  -3. ]]

array([[-0.5,  0. ],
       [ 0. , -3. ],
       [-3. , -3. ]])


print(h, '\n')
print(g, '\n')
np.append(h, g)

[1 2 3] 

[[-0.5  0.   0. ]
 [-3.  -3.  -3. ]]

array([ 1. ,  2. ,  3. , -0.5,  0. ,  0. , -3. , -3. , -3. ])


print(a, '\n')
print(np.insert(a, 1, 5), '\n')
np.delete(a, [2])

[5 7 9] 

[5 5 7 9]

array([5, 7])


print(d, '\n')
np.concatenate((a, d), axis=0)

[10 15 20 25]

array([ 5,  7,  9, 10, 15, 20, 25])


print(a, '\n')
print(b, '\n')

# v : vertical, h : horizontal
np.vstack((a, b))

[5 7 9] 

[4 5 6]

array([[5, 7, 9],
       [4, 5, 6]])


print(e, '\n')
print(f, '\n')
np.hstack((e, f))

[[7 7]
 [7 7]] 

[[1. 0.]
 [0. 1.]]

array([[7., 7., 1., 0.],
       [7., 7., 0., 1.]])


print(a, '\n')
np.hsplit(a, 3)

[5 7 9]

[array([5]), array([7]), array([9])]


print(c, '\n')
np.vsplit(c, 2)

[[[1.5 2.  1. ]
  [4.  5.  6. ]]

 [[3.  2.  3. ]
  [4.  5.  6. ]]]

[array([[[1.5, 2. , 1. ],
         [4. , 5. , 6. ]]]),
 array([[[3., 2., 3.],
         [4., 5., 6.]]])]

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Ticket	Fare	Cabin	Embarked
0	1	0	3	Braund, Mr. Owen Harris	male	22.0	1	A/5 21171	7.2500	NaN	S
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	female	38.0	1	PC 17599	71.2833	C85	C
2	3	1	3	Heikkinen, Miss. Laina	female	26.0	0	STON/O2. 3101282	7.9250	NaN	S
3	4	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	female	35.0	1	113803	53.1000	C123	S
4	5	0	3	Allen, Mr. William Henry	male	35.0	0	373450	8.0500	NaN	S

	Age	Cabin	Embarked	Fare	Name	PassengerId	Pclass	Sex	SibSp	Survived	Ticket
0	22.0	NaN	S	7.2500	Braund, Mr. Owen Harris	1	3	male	1	0	A/5 21171
1	38.0	C85	C	71.2833	Cumings, Mrs. John Bradley (Florence Briggs Th...	2	1	female	1	1	PC 17599
2	26.0	NaN	S	7.9250	Heikkinen, Miss. Laina	3	3	female	0	1	STON/O2. 3101282
3	35.0	C123	S	53.1000	Futrelle, Mrs. Jacques Heath (Lily May Peel)	4	1	female	1	1	113803
4	35.0	NaN	S	8.0500	Allen, Mr. William Henry	5	3	male	0	0	373450

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Ticket	Fare	Cabin	Embarked
0	1	0	3	Braund, Mr. Owen Harris	male	22.0	1	A/5 21171	7.2500	NaN	S
2	3	1	3	Heikkinen, Miss. Laina	female	26.0	0	STON/O2. 3101282	7.9250	NaN	S
3	4	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	female	35.0	1	113803	53.1000	C123	S
4	5	0	3	Allen, Mr. William Henry	male	35.0	0	373450	8.0500	NaN	S
6	7	0	1	McCarthy, Mr. Timothy J	male	54.0	0	17463	51.8625	E46	S

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Ticket	Fare	Cabin	Embarked
0	1	0	3	Braund, Mr. Owen Harris	male	22.0	1	A/5 21171	7.2500	NaN	S
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	female	38.0	1	PC 17599	71.2833	C85	C
2	3	1	3	Heikkinen, Miss. Laina	female	26.0	0	STON/O2. 3101282	7.9250	NaN	S
3	4	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	female	35.0	1	113803	53.1000	C123	S
4	5	0	3	Allen, Mr. William Henry	male	35.0	0	373450	8.0500	NaN	S

	Age	Fare	Parch	PassengerId	Pclass	SibSp	Survived	Agroup
Age	1.000000	0.178740	-0.150917	0.028814	-0.408106	-0.243699	-0.077221	0.840986
Fare	0.178740	1.000000	0.221539	0.031428	-0.558629	0.160238	0.257307	0.158370
Parch	-0.150917	0.221539	1.000000	0.008942	0.018322	0.373587	0.081629	-0.067201
PassengerId	0.028814	0.031428	0.008942	1.000000	-0.038354	-0.055224	-0.005007	0.014800
Pclass	-0.408106	-0.558629	0.018322	-0.038354	1.000000	0.060832	-0.338481	-0.322642
SibSp	-0.243699	0.160238	0.373587	-0.055224	0.060832	1.000000	-0.035322	-0.144085
Survived	-0.077221	0.257307	0.081629	-0.005007	-0.338481	-0.035322	1.000000	-0.003715
Agroup	0.840986	0.158370	-0.067201	0.014800	-0.322642	-0.144085	-0.003715	1.000000

	PassengerId	Survived	Pclass	Name	Sex	Age	Ticket	Fare	Cabin	Embarked
421	422	0	3	Charters, Mr. David	male	21.0	A/5. 13032	7.7333	NaN	Q
156	157	1	3	Gilnagh, Miss. Katherine "Katie"	female	16.0	35851	7.7333	NaN	Q
778	779	0	3	Kilgannon, Mr. Thomas J	male	NaN	36865	7.7375	NaN	Q
727	728	1	3	Mannion, Miss. Margareth	female	NaN	36866	7.7375	NaN	Q

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Parch	Ticket	Fare	Cabin	Embarked
27	28	0	1	Fortune, Mr. Charles Alexander	male	19.0	3	2	19950	263.0000	C23 C25 C27	S
31	32	1	1	Spencer, Mrs. William Augustus (Marie Eugenie)	female	NaN	1	0	PC 17569	146.5208	B78	C
34	35	0	1	Meyer, Mr. Edgar Joseph	male	28.0	1	0	PC 17604	82.1708	NaN	C
62	63	0	1	Harris, Mr. Henry Birkhardt	male	45.0	1	0	36973	83.4750	C83	S
88	89	1	1	Fortune, Miss. Mabel Helen	female	23.0	3	2	19950	263.0000	C23 C25 C27	S

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Parch	Ticket	Fare	Cabin	Embarked
61	62	1	1	Icard, Miss. Amelie	female	38.0	0	0	113572	80.0	B28	NaN
829	830	1	1	Stone, Mrs. George Nelson (Martha Evelyn)	female	62.0	0	0	113572	80.0	B28	NaN

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Parch	Ticket	Fare	Cabin	Embarked
36	37	1	3	Mamee, Mr. Hanna	male	NaN	0	0	2677	7.2292	NaN	C
65	66	1	3	Moubarek, Master. Gerios	male	NaN	1	1	2661	15.2458	NaN	C
74	75	1	3	Bing, Mr. Lee	male	32.0	0	0	1601	56.4958	NaN	S
81	82	1	3	Sheerlinck, Mr. Jan Baptist	male	29.0	0	0	345779	9.5000	NaN	S
107	108	1	3	Moss, Mr. Albert Johan	male	NaN	0	0	312991	7.7750	NaN	S

	Age	Cabin	Embarked	Fare	Name	Parch	PassengerId	Pclass	Sex	SibSp	Survived	Ticket
413	NaN	NaN	S	8.0500	Spector, Mr. Woolf	0	1305	3	male	0	NaN	A.5. 3236
414	39.0	C105	C	108.9000	Oliva y Ocana, Dona. Fermina	0	1306	1	female	0	NaN	PC 17758
415	38.5	NaN	S	7.2500	Saether, Mr. Simon Sivertsen	0	1307	3	male	0	NaN	SOTON/O.Q. 3101262
416	NaN	NaN	S	8.0500	Ware, Mr. Frederick	0	1308	3	male	0	NaN	359309
417	NaN	NaN	C	22.3583	Peter, Master. Michael J	1	1309	3	male	1	NaN	2668

Day8 - Code (0)	2021.12.22
Day8 (0)	2021.12.22
Day7 (0)	2021.12.21
Day6 - Code (0)	2021.12.20
Day6 (0)	2021.12.20

« 2025/07 »
일	월	화	수	목	금	토
		1	2	3	4	5
6	7	8	9	10	11	12
13	14	15	16	17	18	19
20	21	22	23	24	25	26
27	28	29	30	31

		Pclass
Pclass	Survived
1	0	80
1	1	136
2	0	97
2	1	87
3	0	372
3	1	119

		Pclass
Pclass	Survived
1	0	80
1	1	136
2	0	97
2	1	87
3	0	372
3	1	119

Pclass	1	2	3	All
Survived
0	80	97	372	549
1	136	87	119	342
All	216	184	491	891

JuJuKwakKwak

Day7 - Code 본문

Day7 - Code

101_Tips_Titanic¶

Part 1 - Exploring Data¶

Part 2 - Indexing¶

Part 3 - Sorting¶

Part 4 Creating¶

part 5 Extracting¶

Part 6 - Grouping¶

Part 7 - Appending/Inserting¶

Part 8 - Replacing¶

Part 9 Others¶

실습¶

데이터 로드¶

데이터 탐색¶

데이터 다듬기¶

출석 데이터 만들기¶

퀴즈 데이터 만들기¶

보너스 데이터 만들기¶

출석, 퀴즈, 보너스 합치기¶

csv파일로 만들기¶

Numpy Cheat Sheet¶

Numpy arrays¶

Creating arrays¶

initial Placeholders¶

Inspecting your array¶

Array Mathmatics¶

Arithmatic Operations¶

Comparison¶

Aggregate Function¶

Copying Arrays¶

Sorting Arrays¶

Subsetting, Slicing¶

Subsetting¶

Slicing¶

Boolean indexing¶

Fancy indexing¶

Array Manipulation¶

Transposing Array¶

Changing Array Shape¶

Adding/Removing Elements¶

Combinating Arrays¶

Splitting Arrays¶

'Data Science > 국비지원' 카테고리의 다른 글

티스토리툴바