-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHF_kmeans.py
51 lines (33 loc) · 962 Bytes
/
HF_kmeans.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# -*- coding: utf-8 -*-
"""
Created on Sun Nov 10 15:15:52 2024
@author: lavig
"""
from sklearn.cluster import KMeans
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
os.environ["OMP_NUM_THREADS"] = '4'
hf = pd.read_csv("heart_failure_clinical_records_dataset.csv")
print(hf.head())
print(hf.columns)
#calculating the Coefficient of Variance for the following two columns in the hf dataset.
ss_cp = hf[["serum_sodium", "creatinine_phosphokinase"]]
cv = lambda x: np.std(x, ddof=1)/np.mean(x)*100
print(hf.apply(cv))
x = hf.iloc[:,2] #creatinine_phosphokinase
y = hf.iloc[:,8] #serum_sodium
plt.scatter(x, y, alpha=0.50)
plt.title("Creatinine Phosphokinase vs. Serum Sodium")
plt.xlabel("Serum Creatinine")
plt.ylabel("Serum Sodium")
plt.show()
model = KMeans(n_clusters=4)
model.fit(hf)
labels = model.predict(hf)
print(labels)
x = hf.iloc[:,2]
y = hf.iloc[:,8]
plt.scatter(x, y, c=labels, alpha=0.5)
plt.show()