[Machine Learning] Mnist, SVM
1 minute read
Mnist(숫자 손글씨) 분석 예제
import urllib.request as req
import gzip, os, os.path
savepath = "./mnist"
baseurl = "http://yann.lecun.com/exdb/mnist/"
files = [
"train-images-idx3-ubyte.gz",
"train-labels-idx1-ubyte.gz",
"t10k-images-idx3-ubyte.gz",
"t10k-labels-idx1-ubyte.gz"
]
if not os.path.exists(savepath): os.mkdir(savepath)
for f in files:
url = baseurl + f
loc = savepath + "/" + f
print("download: ", url)
if not os.path.exists(loc):
req.urlretrieve(url, loc)
# Gzip 압축해제
for f in files:
gz_file = savepath + "/" + f
raw_file = savepath + "/" + f.replace(".gz", "")
print("gzip:", f)
with gzip.open(gz_file, "rb") as fp:
body = fp.read()
with open(raw_file, "wb") as w:
w.write(body)
print("complete")
- 데이터베이스가 바이너리로 되어있어 분석하기 힘들다.
- CSV로 변환해줍니다.
SVM
import random
# BMI 계산해서 레이블을 리턴하는 함수
def calc_bmi(h, w):
bmi = w / (h/100) ** 2
if bmi < 18.5: return "thin"
if bmi < 25: return "normal"
return "fat"
# 출력 파일 준비하기
fp = open("bmi.csv", "w", encoding = "utf-8")
fp.write("height,weight,label\n")
# 무작위로 데이터 생성해서 저장
cnt = {"thin":0, "normal":0, "fat":0}
for i in range(20000):
h = random.randint(120, 200)
w = random.randint(35, 80)
label = calc_bmi(h, w)
cnt[label] += 1
fp.write("{0},{1},{2}\n".format(h,w,label))
fp.close()
print("ok", cnt)
from sklearn import svm, metrics
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pandas as pd
# 키와 몸무게 데이터 읽어 들이기
tbl = pd.read_csv("bmi.csv")
# 열 나누고 정규화
label = tbl["label"]
w = (tbl["weight"] - tbl["weight"].min()) / (tbl["weight"].max() - tbl["weight"].min())
h = (tbl["height"] - tbl["height"].min()) / (tbl["height"].max() - tbl["height"].min())
wh = pd.concat([w, h], axis=1)
Leave a comment