Automatika.elfak.ni.ac.rs



TitanikOsobinaObja?njenjeTip podatkaPassangerIdRedni broj na csv listiNumeri?ki, diskretniSurvived Da li je osoba pre?ivela: 0 = Ne, 1 = DaKategori?ki (2), numeri?ki (integer)Pclass Putni?ka klasa: 1 = prva klasa, 2 = druga klasa, 3 = tre?a klasaRedni broj (sli?an kategori?kom samo ?to vrednosti mogu da se sortiraju)NameIme osobeAlfanumeri?ki, stringSex Pol osobe: male = mu?ki, female = ?enskiKategori?ki (2), stringAge Broj godinaNumeri?ki, kontinualni (bebe ispod jedne godine opisane decimalnim brojem)SibSp Zbirni broj supru?nika i bra?e i sestara na broduNumeri?ki, diskretniParch Zbirni broj roditelja i dece na broduNumeri?ki, diskretniTicket Broj karteAlfanumeri?ki, stringFare Cena karteNumeri?ki, kontinualniCabin Broj kabineAlfanumeri?ki, stringEmbarked Luka ukrcavanja: C = Cherbourg, Q = Queenstown, S = SouthamptonKategori?ki (3 vrednosti), string#analiza podatakaimport pandas as pdputnici = pd.read_csv('titanik.csv')#ucitavanje podataka3.1 Preliminarni pregled podatakaprint(putnici.columns)print(putnici.head())pd.set_option('display.width', 300)pd.set_option('display.max_columns', 15)print(putnici.head())print(putnici.head(15))print(putnici.tail())print(putnici.sample(10))print(())print(putnici.isnull().sum())print(putnici.describe())#za brojcaneprint(putnici.describe(include=['O']))#za stringoveprint(putnici['Survived'].value_counts())print(putnici['Parch'].value_counts())print(putnici[putnici['Age'] > 65])#vizuelizacijaimport seaborn as snsimport matplotlib.pyplot as pltsns.countplot('Pclass', data=putnici)sns.countplot('Embarked', data=putnici)putnici.hist('Age')putnici.hist('Age', bins=range(0, 81, 1))putnici.hist('Fare', bins=range(0, 515, 5))plt.show()print(putnici[['Sex', 'Survived']].groupby(['Sex']).mean())print(putnici[['Pclass', 'Survived']].groupby(['Pclass']).mean())print(putnici[['Embarked', 'Survived']].groupby(['Embarked']).mean())print(putnici[['SibSp', 'Survived']].groupby(['SibSp']).mean())print(putnici[['SibSp', 'Survived']].groupby(['SibSp']).mean().sort_values(by='Survived', ascending=False))print(putnici[['Parch', 'Survived']].groupby(['Parch']).mean().sort_values(by='Survived', ascending=False))sns.barplot('Sex', 'Survived', data=putnici)plt.show()plt.subplot(221)sns.barplot('Pclass', 'Survived', data=putnici)plt.subplot(222)sns.barplot('Embarked', 'Survived', data=putnici)plt.subplot(223)sns.barplot('SibSp', 'Survived', data=putnici)plt.subplot(224)sns.barplot('Parch', 'Survived', data=putnici)plt.show()print(pd.crosstab(putnici['Embarked'], putnici['Pclass']))preziveli = putnici[putnici['Survived'] == 1]poginuli = putnici[putnici['Survived'] == 0]sns.distplot(preziveli['Age'].dropna().values, bins=range(0, 81, 1), color='red')sns.distplot(poginuli['Age'].dropna().values, bins=range(0, 81, 1), color='blue', axlabel='Starost')plt.show()3.2 Ispitivanje odnosa me?u osobinamasns.heatmap(putnici.corr(), annot=True)plt.show()prez_mus = putnici[putnici['Survived'] == 1 & (putnici['Sex'] == 'male')]pog_mus = putnici[putnici['Survived'] == 0 & (putnici['Sex'] == 'male')]prez_zene = putnici[putnici['Survived'] == 1 & (putnici['Sex'] == 'female')]pog_zene = putnici[putnici['Survived'] == 0 & (putnici['Sex'] == 'female')]plt.subplot(121)sns.distplot(prez_mus['Age'].dropna().values, bins=range(0, 81, 1), kde=False, color='red')sns.distplot(pog_mus['Age'].dropna().values, bins=range(0, 81, 1), kde=False,color='blue', axlabel='Muskarci - Starost')plt.subplot(122)sns.distplot(prez_zene['Age'].dropna().values, bins=range(0, 81, 1), kde=False, color='red')sns.distplot(pog_zene['Age'].dropna().values, bins=range(0, 81, 1), kde=False, color='blue', axlabel='Zene - Starost')plt.show()sns.violinplot('Pclass', 'Age', hue='Survived', data=putnici, split=True)plt.show()sns.factorplot('Pclass', 'Survived', hue='Sex', col='Embarked', data=putnici)plt.show()sns.barplot('Embarked', 'Survived', hue='Pclass', data=putnici)plt.show()4.1 Izbacivanje nekorisnih i kreiranje novih osobina putnici = putnici.drop(['PassengerId', 'Ticket', 'Cabin'], axis=1)print(putnici.columns)putnici['Title']=putnici['Name'].str.split(", ", expand=True)[1].str.split(".", expand=True)[0]print(pd.crosstab(putnici['Title'], putnici['Sex']))putnici = putnici.drop(['Name'], axis=1)print(putnici.columns)putnici['Title'] = putnici['Title'].replace(['Lady', 'the Countess','Capt', 'Col','Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')putnici['Title'] = putnici['Title'].replace(['Mlle', 'Ms'], 'Miss')putnici['Title'] = putnici['Title'].replace('Mme', 'Mrs')print(putnici[['Title', 'Survived']].groupby(['Title']).mean())print(putnici[['Title', 'Age']].groupby(['Title']).mean())print(pd.crosstab(putnici['Title'], putnici['Sex']))putnici['FamilySize'] = putnici['SibSp'] + putnici['Parch'] + 1print(putnici[['FamilySize','Survived']].groupby(['FamilySize']).mean().sort_values(by='Survived', ascending=False))putnici['Alone'] = 0putnici.loc[putnici['FamilySize'] == 1, 'Alone'] = 1print(putnici[['Alone', 'Survived']].groupby(['Alone']).mean())putnici = putnici.drop(['SibSp', 'Parch'], axis=1)print(putnici.columns)4.2 Popunjavanje podataka koji nedostaju i konverzija osobina u odgovaraju?e tipove podatakaprint(putnici[putnici['Embarked'].isnull()])putnici['Embarked'] = putnici['Embarked'].fillna('S')print(putnici['Age'].mean()) print(putnici[['Title', 'Age']].groupby(['Title']).mean())putnici.loc[(putnici['Age'].isnull()) & (putnici['Title'] == 'Master'), 'Age'] = 5putnici.loc[(putnici['Age'].isnull()) & (putnici['Title'] == 'Miss'), 'Age'] = 22putnici.loc[(putnici['Age'].isnull()) & (putnici['Title'] == 'Mr'), 'Age'] = 32putnici.loc[(putnici['Age'].isnull()) & (putnici['Title'] == 'Mrs'), 'Age'] = 36putnici.loc[(putnici['Age'].isnull()) & (putnici['Title'] == 'Rare'), 'Age'] = 46putnici['Sex'] = putnici['Sex'].map({'male': 0, 'female': 1})putnici['Embarked'] = putnici['Embarked'].map({"S": 0, "C": 1, "Q": 2})putnici['Title'] = putnici['Title'].map({'Mr': 1, 'Miss': 2, 'Mrs': 3, 'Master': 4, 'Rare': 5})print(putnici.head(25))print(pd.qcut(putnici['Fare'], 4))putnici.loc[putnici['Fare'] <= 7.91, 'Fare'] = 0putnici.loc[(putnici['Fare'] > 7.91) & (putnici['Fare'] <= 14.454), 'Fare'] = 1putnici.loc[(putnici['Fare'] > 14.454) & (putnici['Fare'] <= 31), 'Fare'] = 2putnici.loc[putnici['Fare'] > 31, 'Fare'] = 3putnici['Fare'] = putnici['Fare'].astype(int)print(putnici[putnici['Age'] == putnici['Age'].max()])putnici.loc[putnici['Age'] <= 16, 'Age'] = 0putnici.loc[(putnici['Age'] > 16) & (putnici['Age'] <= 32), 'Age'] = 1putnici.loc[(putnici['Age'] > 32) & (putnici['Age'] <= 48), 'Age'] = 2putnici.loc[(putnici['Age'] > 48) & (putnici['Age'] <= 64), 'Age'] = 3putnici.loc[putnici['Age'] > 64, 'Age'] = 4putnici['Age'] = putnici['Age'].astype(int)Pravljenje i treniranje modela ma?inskog u?enjaprint(putnici.columns)print(())print(putnici.head(10))sns.heatmap(putnici.corr(), annot=True, cmap='RdYlGn')plt.show()from sklearn.linear_model import LogisticRegression# Logisticka regresijafrom sklearn.neighbors import KNeighborsClassifier# K najblizih suseda from sklearn.tree import DecisionTreeClassifier# Stablo odlucivanjafrom sklearn.ensemble import RandomForestClassifier# Slucajna sumafrom sklearn.svm import SVC# Metoda potpornih vektorafrom sklearn.model_selection import train_test_split# podela podataka trening i testfrom sklearn.metrics import accuracy_score# merenje tacnosti modelay = putnici['Survived']X = putnici.drop('Survived', axis=1)X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)#Logisticka regresijamodel_1 = LogisticRegression(solver='lbfgs')model_1.fit(X_train, y_train)predvidjanje = model_1.predict(X_test)print('Logisticka regresija: ', accuracy_score(predvidjanje, y_test))#K najblizih susedamodel_2 = KNeighborsClassifier(n_neighbors=3)model_2.fit(X_train, y_train)predvidjanje = model_2.predict(X_test)print('K najblizih suseda: ', accuracy_score(predvidjanje, y_test))#Stablo odlucivanjamodel_3 = DecisionTreeClassifier()model_3.fit(X_train, y_train)predvidjanje = model_3.predict(X_test)print('Stablo odlucivanja: ', accuracy_score(predvidjanje, y_test))#Slucajna sumamodel_4 = RandomForestClassifier(n_estimators=100)model_4.fit(X_train, y_train)predvidjanje = model_4.predict(X_test)print('Slucajna suma: ', accuracy_score(predvidjanje, y_test))#Metoda potpornih vektoramodel_5 = SVC(gamma='scale')model_5.fit(X_train, y_train)predvidjanje = model_5.predict(X_test)print('Metoda potpornih vektora: ', accuracy_score(predvidjanje, y_test)) ................
................

In order to avoid copyright disputes, this page is only a partial summary.

Google Online Preview   Download

To fulfill the demand for quickly locating and searching documents.

It is intelligent file search solution for home and business.

Literature Lottery

Related download