DigitRecognizer

来源:互联网 发布:webmin nginx 编辑:程序博客网 时间:2024/05/22 15:27
from sklearn.ensemble import RandomForestClassifierimport numpy as npimport pandas as pd
dataset=pd.read_csv('input/train.csv')test=pd.read_csv('input/test.csv')
dataset.describe()
label pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8 … pixel774 pixel775 pixel776 pixel777 pixel778 pixel779 pixel780 pixel781 pixel782 pixel783 count 42000.000000 42000 42000 42000 42000 42000 42000 42000 42000 42000 … 42000.000000 42000.000000 42000.000000 42000.00000 42000.000000 42000.000000 42000 42000 42000 42000 mean 4.456643 0 0 0 0 0 0 0 0 0 … 0.219286 0.117095 0.059024 0.02019 0.017238 0.002857 0 0 0 0 std 2.887730 0 0 0 0 0 0 0 0 0 … 6.312890 4.633819 3.274488 1.75987 1.894498 0.414264 0 0 0 0 min 0.000000 0 0 0 0 0 0 0 0 0 … 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000 0 0 0 0 25% 2.000000 0 0 0 0 0 0 0 0 0 … 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000 0 0 0 0 50% 4.000000 0 0 0 0 0 0 0 0 0 … 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000 0 0 0 0 75% 7.000000 0 0 0 0 0 0 0 0 0 … 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000 0 0 0 0 max 9.000000 0 0 0 0 0 0 0 0 0 … 254.000000 254.000000 253.000000 253.00000 254.000000 62.000000 0 0 0 0

8 rows × 785 columns

dataset.head()
label pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8 … pixel774 pixel775 pixel776 pixel777 pixel778 pixel779 pixel780 pixel781 pixel782 pixel783 0 1 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 2 1 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 3 4 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0

5 rows × 785 columns

test.head()
pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8 pixel9 … pixel774 pixel775 pixel776 pixel777 pixel778 pixel779 pixel780 pixel781 pixel782 pixel783 0 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0

5 rows × 784 columns

target=dataset[[0]].values
target
array([[1], [0], [1], …, [7], [6], [9]])
target=target.ravel()
target
array([1, 0, 1, …, 7, 6, 9])
train=dataset.iloc[:,1:].values
train
array([[0, 0, 0, …, 0, 0, 0], [0, 0, 0, …, 0, 0, 0], [0, 0, 0, …, 0, 0, 0], …, [0, 0, 0, …, 0, 0, 0], [0, 0, 0, …, 0, 0, 0], [0, 0, 0, …, 0, 0, 0]])
File “”, line 1 print train.dtype ^ SyntaxError: Missing parentheses in call to ‘print’
test
pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8 pixel9 … pixel774 pixel775 pixel776 pixel777 pixel778 pixel779 pixel780 pixel781 pixel782 pixel783 0 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 7 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 10 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 11 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 12 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 13 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 14 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 15 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 16 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 17 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 18 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 19 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 20 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 21 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 22 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 23 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 24 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 25 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 26 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 28 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 29 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 … … … … … … … … … … … … … … … … … … … … … … 27970 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27971 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27972 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27973 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27974 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27975 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27976 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27977 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27978 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27979 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27980 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27981 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27982 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27983 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27984 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27985 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27986 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27987 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27988 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27989 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27990 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27991 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27992 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27993 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27994 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27995 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27996 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27997 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27998 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0 27999 0 0 0 0 0 0 0 0 0 0 … 0 0 0 0 0 0 0 0 0 0

28000 rows × 784 columns

test=test.values
test
array([[0, 0, 0, …, 0, 0, 0], [0, 0, 0, …, 0, 0, 0], [0, 0, 0, …, 0, 0, 0], …, [0, 0, 0, …, 0, 0, 0], [0, 0, 0, …, 0, 0, 0], [0, 0, 0, …, 0, 0, 0]])
#creat and train the random forestrf=RandomForestClassifier(n_estimators=100)rf.fit(train,target)
RandomForestClassifier(bootstrap=True, class_weight=None, criterion=’gini’, max_depth=None, max_features=’auto’, max_leaf_nodes=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1, oob_score=False, random_state=None, verbose=0, warm_start=False)
pred=rf.predict(test)
pred
array([2, 0, 9, …, 3, 9, 2])
subdemo=pd.read_csv('input/sample_submission.csv')
subdemo
ImageId Label 0 1 0 1 2 0 2 3 0 3 4 0 4 5 0 5 6 0 6 7 0 7 8 0 8 9 0 9 10 0 10 11 0 11 12 0 12 13 0 13 14 0 14 15 0 15 16 0 16 17 0 17 18 0 18 19 0 19 20 0 20 21 0 21 22 0 22 23 0 23 24 0 24 25 0 25 26 0 26 27 0 27 28 0 28 29 0 29 30 0 … … … 27970 27971 0 27971 27972 0 27972 27973 0 27973 27974 0 27974 27975 0 27975 27976 0 27976 27977 0 27977 27978 0 27978 27979 0 27979 27980 0 27980 27981 0 27981 27982 0 27982 27983 0 27983 27984 0 27984 27985 0 27985 27986 0 27986 27987 0 27987 27988 0 27988 27989 0 27989 27990 0 27990 27991 0 27991 27992 0 27992 27993 0 27993 27994 0 27994 27995 0 27995 27996 0 27996 27997 0 27997 27998 0 27998 27999 0 27999 28000 0

28000 rows × 2 columns

len(test)
28000
len(pred)
28000
np.savetxt('submission_rand_forest.csv', np.c_[range(1,len(test)+1),pred], delimiter=',', header = 'ImageId,Label', comments = '', fmt='%d')
1 0