Automatika - Početna



TitanikOsobinaObja?njenjeTip podatkaPassangerIdRedni broj na csv listiNumeri?ki, diskretniSurvived Da li je osoba pre?ivela: 0 = Ne, 1 = DaKategori?ki (2), numeri?ki (integer)Pclass Putni?ka klasa: 1 = prva klasa, 2 = druga klasa, 3 = tre?a klasaRedni broj (sli?an kategori?kom samo ?to vrednosti mogu da se sortiraju)NameIme osobeAlfanumeri?ki, stringSex Pol osobe: male = mu?ki, female = ?enskiKategori?ki (2), stringAge Broj godinaNumeri?ki, kontinualni (bebe ispod jedne godine opisane decimalnim brojem)SibSp Zbirni broj supru?nika i bra?e i sestara na broduNumeri?ki, diskretniParch Zbirni broj roditelja i dece na broduNumeri?ki, diskretniTicket Broj karteAlfanumeri?ki, stringFare Cena karteNumeri?ki, kontinualniCabin Broj kabineAlfanumeri?ki, stringEmbarked Luka ukrcavanja: C = Cherbourg, Q = Queenstown, S = SouthamptonKategori?ki (3 vrednosti), string#analiza podatakaimport pandas as pdputnici = pd.read_csv('titanik.csv')#ucitavanje podataka3.1 Preliminarni pregled podatakaprint(putnici.columns)print(putnici.head())pd.set_option('display.width', 300)pd.set_option('display.max_columns', 15)print(putnici.head())print(putnici.head(15))print(putnici.tail())print(putnici.sample(10))print(())print(putnici.isnull().sum())print(putnici.describe())#za brojcaneprint(putnici.describe(include=['O']))#za stringoveprint(putnici['Survived'].value_counts())print(putnici['Parch'].value_counts())print(putnici[putnici['Age'] > 65])#vizuelizacijaimport seaborn as snsimport matplotlib.pyplot as pltsns.countplot('Pclass', data=putnici)sns.countplot('Embarked', data=putnici)putnici.hist('Age')putnici.hist('Age', bins=range(0, 81, 1))putnici.hist('Fare', bins=range(0, 515, 5))plt.show()print(putnici[['Sex', 'Survived']].groupby(['Sex']).mean())print(putnici[['Pclass', 'Survived']].groupby(['Pclass']).mean())print(putnici[['Embarked', 'Survived']].groupby(['Embarked']).mean())print(putnici[['SibSp', 'Survived']].groupby(['SibSp']).mean())print(putnici[['SibSp', 'Survived']].groupby(['SibSp']).mean().sort_values(by='Survived', ascending=False))print(putnici[['Parch', 'Survived']].groupby(['Parch']).mean().sort_values(by='Survived', ascending=False))sns.barplot('Sex', 'Survived', data=putnici)plt.show()plt.subplot(221)sns.barplot('Pclass', 'Survived', data=putnici)plt.subplot(222)sns.barplot('Embarked', 'Survived', data=putnici)plt.subplot(223)sns.barplot('SibSp', 'Survived', data=putnici)plt.subplot(224)sns.barplot('Parch', 'Survived', data=putnici)plt.show()print(pd.crosstab(putnici['Embarked'], putnici['Pclass']))preziveli = putnici[putnici['Survived'] == 1]poginuli = putnici[putnici['Survived'] == 0]sns.distplot(preziveli['Age'].dropna().values, bins=range(0, 81, 1), color='red')sns.distplot(poginuli['Age'].dropna().values, bins=range(0, 81, 1), color='blue', axlabel='Starost')plt.show()3.2 Ispitivanje odnosa me?u osobinamasns.heatmap(putnici.corr(), annot=True)plt.show()prez_mus = putnici[putnici['Survived'] == 1 & (putnici['Sex'] == 'male')]pog_mus = putnici[putnici['Survived'] == 0 & (putnici['Sex'] == 'male')]prez_zene = putnici[putnici['Survived'] == 1 & (putnici['Sex'] == 'female')]pog_zene = putnici[putnici['Survived'] == 0 & (putnici['Sex'] == 'female')]plt.subplot(121)sns.distplot(prez_mus['Age'].dropna().values, bins=range(0, 81, 1), kde=False, color='red')sns.distplot(pog_mus['Age'].dropna().values, bins=range(0, 81, 1), kde=False,color='blue', axlabel='Muskarci - Starost')plt.subplot(122)sns.distplot(prez_zene['Age'].dropna().values, bins=range(0, 81, 1), kde=False, color='red')sns.distplot(pog_zene['Age'].dropna().values, bins=range(0, 81, 1), kde=False, color='blue', axlabel='Zene - Starost')plt.show()sns.violinplot('Pclass', 'Age', hue='Survived', data=putnici, split=True)plt.show()sns.factorplot('Pclass', 'Survived', hue='Sex', col='Embarked', data=putnici)plt.show()sns.barplot('Embarked', 'Survived', hue='Pclass', data=putnici)plt.show()4.1 Izbacivanje nekorisnih i kreiranje novih osobina putnici = putnici.drop(['PassengerId', 'Ticket', 'Cabin'], axis=1)print(putnici.columns)putnici['Title']=putnici['Name'].str.split(", ", expand=True)[1].str.split(".", expand=True)[0]print(pd.crosstab(putnici['Title'], putnici['Sex']))putnici = putnici.drop(['Name'], axis=1)print(putnici.columns)putnici['Title'] = putnici['Title'].replace(['Lady', 'the Countess','Capt', 'Col','Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')putnici['Title'] = putnici['Title'].replace(['Mlle', 'Ms'], 'Miss')putnici['Title'] = putnici['Title'].replace('Mme', 'Mrs')print(putnici[['Title', 'Survived']].groupby(['Title']).mean())print(putnici[['Title', 'Age']].groupby(['Title']).mean())print(pd.crosstab(putnici['Title'], putnici['Sex']))putnici['FamilySize'] = putnici['SibSp'] + putnici['Parch'] + 1print(putnici[['FamilySize','Survived']].groupby(['FamilySize']).mean().sort_values(by='Survived', ascending=False))putnici['Alone'] = 0putnici.loc[putnici['FamilySize'] == 1, 'Alone'] = 1print(putnici[['Alone', 'Survived']].groupby(['Alone']).mean())putnici = putnici.drop(['SibSp', 'Parch'], axis=1)print(putnici.columns)4.2 Popunjavanje podataka koji nedostaju i konverzija osobina u odgovaraju?e tipove podatakaprint(putnici[putnici['Embarked'].isnull()])putnici['Embarked'] = putnici['Embarked'].fillna('S')print(putnici['Age'].mean()) print(putnici[['Title', 'Age']].groupby(['Title']).mean())putnici.loc[(putnici['Age'].isnull()) & (putnici['Title'] == 'Master'), 'Age'] = 5putnici.loc[(putnici['Age'].isnull()) & (putnici['Title'] == 'Miss'), 'Age'] = 22putnici.loc[(putnici['Age'].isnull()) & (putnici['Title'] == 'Mr'), 'Age'] = 32putnici.loc[(putnici['Age'].isnull()) & (putnici['Title'] == 'Mrs'), 'Age'] = 36putnici.loc[(putnici['Age'].isnull()) & (putnici['Title'] == 'Rare'), 'Age'] = 46putnici['Sex'] = putnici['Sex'].map({'male': 0, 'female': 1})putnici['Embarked'] = putnici['Embarked'].map({"S": 0, "C": 1, "Q": 2})putnici['Title'] = putnici['Title'].map({'Mr': 1, 'Miss': 2, 'Mrs': 3, 'Master': 4, 'Rare': 5})print(putnici.head(25))print(pd.qcut(putnici['Fare'], 4))putnici.loc[putnici['Fare'] <= 7.91, 'Fare'] = 0putnici.loc[(putnici['Fare'] > 7.91) & (putnici['Fare'] <= 14.454), 'Fare'] = 1putnici.loc[(putnici['Fare'] > 14.454) & (putnici['Fare'] <= 31), 'Fare'] = 2putnici.loc[putnici['Fare'] > 31, 'Fare'] = 3putnici['Fare'] = putnici['Fare'].astype(int)print(putnici[putnici['Age'] == putnici['Age'].max()])putnici.loc[putnici['Age'] <= 16, 'Age'] = 0putnici.loc[(putnici['Age'] > 16) & (putnici['Age'] <= 32), 'Age'] = 1putnici.loc[(putnici['Age'] > 32) & (putnici['Age'] <= 48), 'Age'] = 2putnici.loc[(putnici['Age'] > 48) & (putnici['Age'] <= 64), 'Age'] = 3putnici.loc[putnici['Age'] > 64, 'Age'] = 4putnici['Age'] = putnici['Age'].astype(int)Pravljenje i treniranje modela ma?inskog u?enjaprint(putnici.columns)print(())print(putnici.head(10))sns.heatmap(putnici.corr(), annot=True, cmap='RdYlGn')plt.show()from sklearn.linear_model import LogisticRegression# Logisticka regresijafrom sklearn.neighbors import KNeighborsClassifier# K najblizih suseda from sklearn.tree import DecisionTreeClassifier# Stablo odlucivanjafrom sklearn.ensemble import RandomForestClassifier# Slucajna sumafrom sklearn.svm import SVC# Metoda potpornih vektorafrom sklearn.model_selection import train_test_split# podela podataka trening i testfrom sklearn.metrics import accuracy_score# merenje tacnosti modelay = putnici['Survived']X = putnici.drop('Survived', axis=1)X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)#Logisticka regresijamodel_1 = LogisticRegression(solver='lbfgs')model_1.fit(X_train, y_train)predvidjanje = model_1.predict(X_test)print('Logisticka regresija: ', accuracy_score(predvidjanje, y_test))#K najblizih susedamodel_2 = KNeighborsClassifier(n_neighbors=3)model_2.fit(X_train, y_train)predvidjanje = model_2.predict(X_test)print('K najblizih suseda: ', accuracy_score(predvidjanje, y_test))#Stablo odlucivanjamodel_3 = DecisionTreeClassifier()model_3.fit(X_train, y_train)predvidjanje = model_3.predict(X_test)print('Stablo odlucivanja: ', accuracy_score(predvidjanje, y_test))#Slucajna sumamodel_4 = RandomForestClassifier(n_estimators=100)model_4.fit(X_train, y_train)predvidjanje = model_4.predict(X_test)print('Slucajna suma: ', accuracy_score(predvidjanje, y_test))#Metoda potpornih vektoramodel_5 = SVC(gamma='scale')model_5.fit(X_train, y_train)predvidjanje = model_5.predict(X_test)print('Metoda potpornih vektora: ', accuracy_score(predvidjanje, y_test)) ................
................

In order to avoid copyright disputes, this page is only a partial summary.

Google Online Preview   Download

To fulfill the demand for quickly locating and searching documents.

It is intelligent file search solution for home and business.

Literature Lottery

Related download
Related searches