corrections

tiagoantao · tiagoantao · commit fdfaebcdfeca · 2022-11-20T12:06:58.000-05:00
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,3 @@
 .ipynb_checkpoints
 .Rhistory
-Chapter01/sequence.index
-Chapter11/data
+__pycache__
diff --git a/Chapter07/Alignment.py b/Chapter07/Alignment.py
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: light
 #       format_version: '1.5'
-#       jupytext_version: 1.13.6
+#       jupytext_version: 1.14.0
 #   kernelspec:
 #     display_name: Python 3 (ipykernel)
 #     language: python
diff --git a/Chapter10/Clustering.py b/Chapter10/Clustering.py
@@ -116,9 +116,11 @@ def plot_kmeans_pca(trans, kmeans):
 plot_kmeans_pca(trans, kmeans4)
 
 pca_predict = my_pca.transform([predict_case])
-kmeans.predict(pca_predict)
+kmeans4.predict(pca_predict)
 
 last_train = ind_order[-2]
 last_train, ind_pop[last_train]
 
-kmeans.predict(trans)[0]
+kmeans4.predict(trans)[0]
+
+
diff --git a/Chapter10/Decision_Tree.py b/Chapter10/Decision_Tree.py
@@ -50,24 +50,27 @@
 samples
 
 # + jupyter={"outputs_hidden": false}
-trainning_input = samples.iloc[:,:-1]
+training_input = samples.iloc[:,:-1]
 target = samples.iloc[:,-1].apply(lambda x: 0 if x == 2 else 1)
 
 # + jupyter={"outputs_hidden": false}
 clf = tree.DecisionTreeClassifier(max_depth=3)
 
 # + jupyter={"outputs_hidden": false}
-clf.fit(trainning_input, target)
+clf.fit(training_input, target)
 
 # + jupyter={"outputs_hidden": false}
 importances = pd.Series(
     clf.feature_importances_ * 100,
-    index=trainning_input.columns).sort_values(ascending=False)
+    index=training_input.columns).sort_values(ascending=False)
 importances
 
+# + jupyter={"outputs_hidden": false}
+100 * clf.score(training_input, target)
+
 # + jupyter={"outputs_hidden": false}
 fig, ax = plt.subplots(1, dpi=300)
-tree.plot_tree(clf,ax=ax, feature_names=trainning_input.columns, class_names=['Benign', 'Malignant'])
+tree.plot_tree(clf,ax=ax, feature_names=training_input.columns, class_names=['Benign', 'Malignant'])
 # -
 
 
diff --git a/Chapter10/Random_Forest.py b/Chapter10/Random_Forest.py
@@ -55,7 +55,7 @@
 target = samples.iloc[:,-1]
 
 # + jupyter={"outputs_hidden": false}
-clf = RandomForestClassifier(max_depth=3)
+clf = RandomForestClassifier(max_depth=3, n_estimators=200)
 
 # + jupyter={"outputs_hidden": false}
 clf.fit(trainning_input, target)
@@ -67,7 +67,9 @@
 importances
 # -
 
-clf.score(trainning_input, target)
+100 * clf.score(trainning_input, target)
+
+
 
 for test_size in [0.01, 0.1, 0.2, 0.5, 0.8, 0.9, 0.99]:
     X_train, X_test, y_train, y_test = train_test_split(
@@ -77,3 +79,5 @@
     score = tclf.score(X_test, y_test)
     print(f'{1 - test_size:.1%} {score:.2%}')
 # Random number generator
+
+
diff --git a/Datasets.py b/Datasets.py
@@ -1,3 +1,4 @@
+
 # # Datasets for the book
 #
 # Here we provide links to the datasets used in the book.
@@ -13,4 +14,9 @@
 # http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/20130502.phase3.sequence.index
 #
 
-
+# # PDB
+#
+# 
+# ## Parsing mmCIF files with Biopython
+#
+# [1TUP.cif](http://www.rcsb.org/pdb/download/downloadFile.do?fileFormat=cif&compression=NO&structureId=1TUP)"
diff --git a/docker/Chapter01/Dockerfile b/docker/Chapter01/Dockerfile
@@ -1,7 +1,6 @@
 FROM tiagoantao/bio3
 MAINTAINER Tiago Antao <tiago@tiago.org>
-RUN conda create -n bioinformatics-r --clone bionformatics_base
+RUN conda create -n bioinformatics_r --clone bionformatics_base
 
-RUN conda init bash
-RUN conda activate bioinformatics-r; conda install r-ggplot2=3.3.5 r-lazyeval=0.2.2 r-gridextra=2.3 rpy2
-CMD conda init bash; conda activate bioinformatics-r; jupyter-lab --ip=0.0.0.0 --no-browser --allow-root --port=9875 --NotebookApp.token='' --NotebookApp.password=''
+RUN conda install -n bioinformatics_r r-ggplot2=3.3.5 r-lazyeval=0.2.2 r-gridextra=2.3 rpy2
+CMD conda run --no-capture-output -n bioinformatics_r jupyter-lab --ip=0.0.0.0 --no-browser --allow-root --port=9875 --NotebookApp.token='' --NotebookApp.password=''
diff --git a/docker/main/Dockerfile b/docker/main/Dockerfile
@@ -10,12 +10,13 @@ RUN git clone https://github.com/PacktPublishing/Bioinformatics-with-Python-Cook
 #RUN conda upgrade -n base conda
 RUN conda config --add channels conda-forge
 RUN conda config --add channels bioconda
-RUN conda create -n bionformatics_base --file /Bioinformatics-with-Python-Cookbook-third-Edition/Chapter01/bioinformatics_base.txt
+RUN conda create -n bioinformatics_base --file /Bioinformatics-with-Python-Cookbook-third-Edition/Chapter01/bioinformatics_base.txt
+RUN pip install pyarrow==8.0.0
 RUN conda init bash
 
 EXPOSE 9875
 
 WORKDIR /Bioinformatics-with-Python-Cookbook-third-Edition
 
 RUN echo setterm -foreground magenta >> /etc/bash.bashrc
-CMD conda activate bioinformatics_base; jupyter-lab --ip=0.0.0.0 --no-browser --allow-root --port=9875 --NotebookApp.token='' --NotebookApp.password=''
+CMD conda run --no-capture-output -n bioinformatics_base jupyter-lab --ip=0.0.0.0 --no-browser --allow-root --port=9875 --NotebookApp.token='' --NotebookApp.password=''

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+`
`1`	`2`	`# # Datasets for the book`
`2`	`3`	`#`
`3`	`4`	`# Here we provide links to the datasets used in the book.`
`@@ -13,4 +14,9 @@`
`13`	`14`	`# http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/20130502.phase3.sequence.index`
`14`	`15`	`#`
`15`	`16`
`16`		`-`
	`17`	`+# # PDB`
	`18`	`+#`
	`19`	`+#`
	`20`	`+# ## Parsing mmCIF files with Biopython`
	`21`	`+#`
	`22`	`+# [1TUP.cif](http://www.rcsb.org/pdb/download/downloadFile.do?fileFormat=cif&compression=NO&structureId=1TUP)"`