-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathVolcanoPlot.ipf
1965 lines (1749 loc) · 64.8 KB
/
VolcanoPlot.ipf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#pragma TextEncoding = "UTF-8"
#pragma rtGlobals=3 // Use modern global access method and strict wave access
#pragma DefaultTab={3,20,4} // Set default tab width in Igor Pro 9 and later
#include <WaveSelectorWidget>
#include <PopupWaveSelector>
// the user can start with:
// a single proteinGroups.txt file from MaxQuant (LFQ)
// multiple proteinGroups.txt files from MaxQuant (LFQ) each in their own folder in a single folder
// some waves already loaded into Igor
////////////////////////////////////////////////////////////////////////
// Menu items
////////////////////////////////////////////////////////////////////////
Menu "Proteomics"
"Load MaxQuant Data...", /Q, LoadMaxQuantData()
"Load Multiple MaxQuant...", /Q, LoadMultiMaxQuantData()
SubMenu "Tools"
"Volcano Plot...", /Q, VolcanoIO_Panel() // use this option if waves already loaded
// "PCA Only...", /Q, MakePCAWaveSelectorPanel() // hide this option
"Label Top 10", /Q, LabelTopTenWorkflow()
"Save Layout", /Q, SaveTheLayout()
"Save Table", /Q, SaveTheTable()
"Save Data", /Q, SaveUnsortedDataMeans()
// "Save for ProteoRE", /Q, SaveForProteore()
end
SubMenu "Subcellular analysis"
"Make List to Retrieve Uniprot Data", /Q, UniprotTable()
"Load and Match UniProt Data...", /Q, UniprotWrapper()
"Filter for GO Term(s)", /Q, GOTerm_Panel()
end
End
////////////////////////////////////////////////////////////////////////
// Master functions and wrappers
////////////////////////////////////////////////////////////////////////
// user selects a proteinGroups.txt file to analyse
// user then specifies what is compared
Function LoadMaxQuantData()
if(LoadMaxQuantFile() == 0)
VolcanoIO_Panel()
endif
End
// user selects a directory with sub-directories containing a proteinGroups.txt file
// to analyse. User then specifies what is compared for each of the files
Function LoadMultiMaxQuantData()
if(LoadMaxQuantFiles() == 0)
MakeExpSelectorPanel()
endif
End
// this is the workflow to make a Volcano Plot and associated plots
Function VolcanoWorkflowWrapper(VARIABLE opt)
if(opt == 0)
PrepareForVolcano()
else
ConsolidateData()
MergeTheData()
endif
MakeVolcano()
MakeColorTableWave()
MakeVPlot()
TableInterestingValues()
AddSignificantHitsToVolcano(0)
MakeMeanComparison()
FromVolcanoToPCA()
MakeTheLayout()
End
// this wrapper allows the user to select proteins for display using GO terms
Function UniprotWrapper()
if(LoadUniprot() == 0)
MatchResultsAndUniProt()
endif
if(FilterGOTerms() == 0)
// build listbox that allows multiple selections
GOTerm_Panel()
// this panel triggers FilteredVolcanoWorkflowWrapper()
endif
// use selections to filter gene-names that feature the GO terms
// plot volcano using only those gene_names
End
Function FilteredVolcanoWorkflowWrapper()
FilterForGOTerms()
MakeFilteredVPlot()
AddSignificantHitsToVolcano(1)
AddGOTermsToVolcano()
End
Function LabelTopTenWorkflow()
LabelTopXProts(10)
End
////////////////////////////////////////////////////////////////////////
// Main functions
////////////////////////////////////////////////////////////////////////
Function LoadMaxQuantFile()
NewDataFolder/O/S root:data
LoadWave/A/D/J/K=0/L={0,0,0,0,0}/V={"\t","",0,0}/W/O/Q ""
if (strlen(S_Path) == 0) // user pressed cancel
return -1
endif
NewPath/O/Q DiskFolder, S_Path
WAVE/Z/T Gene_names, Protein_names, Protein_IDs, Fasta_headers
WAVE/Z/T Potential_contaminant, Only_identified_by_site, ReverseW
Duplicate/O Gene_names, root:SHORTNAME
Duplicate/O Protein_names, root:NAME
Duplicate/O Protein_IDs, root:PID
Wave/T SHORTNAME = $("root:SHORTNAME")
Wave/T NAME = $("root:NAME")
String wList = WaveList("LFQ_Intensity*",";","")
Variable nWaves = ItemsInList(wList)
String newList = "root:SHORTNAME;root:NAME;root:PID;"
String wName, newName, fasta
Variable start, stop
Variable i,j
for(i = 0; i < nWaves; i += 1)
wName = StringFromList(i,wList)
newName = "root:" + ReplaceString("LFQ_Intensity_",wName,"")
Duplicate/O $wName, $newName
newList += newName + ";"
endfor
// repopulate blanks in Gene_names and Protein_names column using Fasta_headers
Variable nRow = numpnts(Gene_Names)
for(i = 0; i < nRow; i += 1)
if(strlen(SHORTNAME[i]) > 0 && strlen(NAME[i]) > 0)
continue
endif
fasta = StringFromList(0,Fasta_headers[i])
if(strlen(SHORTNAME[i]) == 0)
start = strsearch(fasta," GN=",0) + 4
stop = strsearch(fasta," ",start) - 1
if(stop < 0)
stop = strlen(fasta) - 1
endif
SHORTNAME[i] = fasta[start,stop]
endif
if(strlen(NAME[i]) == 0)
start = strsearch(fasta," ",0) + 1
stop = strsearch(fasta," OS=",0) - 1
NAME[i] = fasta[start,stop]
endif
endfor
// we need to remove data where there is
// a "+" in Potential_contaminant, Only_identified_by_site, or ReverseW
// it is possible though that if there are no + in one of these columns, the wave will be numeric
nWaves = ItemsInList(newList)
for(i = nRow - 1; i >= 0; i -= 1)
if(WaveType(Potential_contaminant,1) == 2 && cmpstr(Potential_contaminant[i],"+") == 0)
for(j = 0; j < nWaves; j += 1)
DeletePoints i,1,$(StringFromList(j,newList))
endfor
continue
endif
if(WaveType(Only_identified_by_site,1) == 2 && cmpstr(Only_identified_by_site[i],"+") == 0)
for(j = 0; j < nWaves; j += 1)
DeletePoints i,1,$(StringFromList(j,newList))
endfor
continue
endif
if(WaveType(ReverseW,1) == 2 && cmpstr(ReverseW[i],"+") == 0)
for(j = 0; j < nWaves; j += 1)
DeletePoints i,1,$(StringFromList(j,newList))
endfor
continue
endif
endfor
SetDataFolder root:
return 0
End
Function LoadMaxQuantFiles()
// find folder containing the subfolders
NewPath/O/Q/M="Please find the folder containing subfolders" diskFolder
if (V_flag != 0)
DoAlert 0, "Disk folder error"
return -1
endif
PathInfo diskFolder
String diskFolderPath0 = S_Path
// set up data folder and get ready to load
NewDataFolder/O/S root:data
// exp in this case means a unique proteomics experiment
String expDirList = IndexedDir(diskFolder,-1,0)
Variable nExp = ItemsInList(expDirList)
Make/O/N=(nExp)/T nameWave0
String dfName0, diskFolderPath1, dataFolderPath
String wName, newName, wList, newList, condList, fasta
Variable nRow, nWaves, start, stop
Variable i,j,k
for (i = 0; i < nExp; i += 1)
nameWave0[i] = StringFromList(i,expDirList)
dfName0 = "exp_" + num2str(i)
NewDataFolder/O/S $dfName0
diskFolderPath1 = diskFolderPath0 + nameWave0[i] + ":"
NewPath/O/Q expDiskFolder1, diskFolderPath1
NewDataFolder/O/S $("data")
LoadWave/A/D/J/K=0/L={0,0,0,0,0}/V={"\t","",0,0}/W/O/P=expDiskFolder1/Q "proteinGroups.txt"
if(V_flag < 4)
DoAlert 0, "No proteinGroups file found for " + nameWave0[i]
return -1
endif
dataFolderPath = "root:data:" + dfName0 + ":"
// for historical reasons these two waves need to be renamed
// place them in the exp folder
WAVE/Z/T Gene_names, Protein_names, Protein_IDs, Fasta_headers
WAVE/Z/T Potential_contaminant, Only_identified_by_site, ReverseW
Duplicate/O Gene_names, $(dataFolderPath + "SHORTNAME")
Duplicate/O Protein_names, $(dataFolderPath + "NAME")
Duplicate/O Protein_IDs, $(dataFolderPath + "PID")
Wave/T SHORTNAME = $(dataFolderPath + "SHORTNAME")
Wave/T NAME = $(dataFolderPath + "NAME")
// what LFQ values do we have - these are conditions and replicates
wList = WaveList("LFQ_Intensity*",";","")
condList = ReplaceString("LFQ_Intensity_",wList,"")
nWaves = ItemsInList(wList)
// save names of conditions from this proteinGroups file
GenerateConditionGroupWaves(condList,i)
newList = dataFolderPath + "SHORTNAME;" + dataFolderPath + "NAME;" + dataFolderPath + "PID;"
for(j = 0; j < nWaves; j += 1)
wName = StringFromList(j,wList)
newName = dataFolderPath + StringFromList(j,condList)
Duplicate/O $wName, $newName
newList += newName + ";"
endfor
// repopulate blanks in Gene_names and Protein_names column using Fasta_headers
nRow = numpnts(Gene_Names)
for(j = 0; j < nRow; j += 1)
if(strlen(SHORTNAME[j]) > 0 && strlen(NAME[j]) > 0)
continue
endif
fasta = StringFromList(0,Fasta_headers[j])
if(strlen(SHORTNAME[j]) == 0)
start = strsearch(fasta," GN=",0) + 4
stop = strsearch(fasta," ",start) - 1
SHORTNAME[j] = fasta[start,stop]
endif
if(strlen(NAME[j]) == 0)
start = strsearch(fasta," ",0) + 1
stop = strsearch(fasta," OS=",0) - 1
NAME[j] = fasta[start,stop]
endif
endfor
// remove data where there is
// a "+" in Potential_contaminant, Only_identified_by_site, or ReverseW
nWaves = ItemsInList(newList)
for(j = nRow - 1; j >= 0; j -= 1)
if(cmpstr(Potential_contaminant[j],"+") == 0 || cmpstr(Only_identified_by_site[j],"+") == 0 || cmpstr(ReverseW[j],"+") == 0)
for(k = 0; k < nWaves; k += 1)
DeletePoints j,1,$(StringFromList(k,newList))
endfor
endif
endfor
SetDataFolder root:data:
endfor
SetDataFolder root:
return 0
End
STATIC Function GenerateConditionGroupWaves(STRING condList, VARIABLE ii)
Make/O/N=(ItemsInList(condList))/T $("root:data:cond_" + num2str(ii)) = StringFromList(p,condList)
Wave/T/Z tw = $("root:data:cond_" + num2str(ii))
Duplicate/O/FREE/T tw, temptw
temptw[] = tw[p][0][0][0][0,strlen(tw[p])-2] // this removes the last character - more than 9 replicates = problem
FindDuplicates/FREE/RT=uCond temptw
Duplicate/O/T uCond $("root:data:condMstr_" + num2str(ii))
End
Function PrepareForVolcano()
WAVE/Z volcanoParamWave
WAVE/Z/T volcanoPrefixWave
String prefix1 = volcanoPrefixWave[0] // string prefix that will select all waves of condition 1
String prefix2 = volcanoPrefixWave[1] // string prefix that will select all waves of condition 2
Variable baseVal = volcanoParamWave[0] // proteins that are absent have this value
Variable pairOpt = volcanoParamWave[1] // 1 is paired, 0 is not
Variable seedOpt = volcanoParamWave[2] // 1 is use seed, 0 is not (truly random)
Variable foldChange = log(abs(volcanoParamWave[3])) / log(2) // log2 value for threshold i.e. 1 is 2-fold change
Variable seed1 = volcanoParamWave[4] // 1-1000 value for seed
Variable seed2 = volcanoParamWave[5] // 1-1000 value for seed
Variable meanOpt = volcanoParamWave[6] // 1 is ratios v ratios, 0 is mean v mean
String wList1 = WaveList(prefix1,";","")
String wList2 = WaveList(prefix2,";","")
if(ItemsInList(wList1) == 0 || ItemsInList(wList2) == 0)
DoAlert 0, "Missing data"
return -1
else
Print "Test runs:", ItemsInList(wList1), "\rControl runs:", ItemsInList(wList2)
endif
// make sure the lists are in order
wList1 = SortList(wList1)
wList2 = SortList(wList2)
// Possibly we should use a more sophisticated way to check the groups match?
// using this method x_1, x_2, x_3 will run with y_1, y_4, y_5
Concatenate/O/NP=1 wList1, allCond1
Concatenate/O/NP=1 wList2, allCond2
// This is a comparison between two conditions in one proteinGroups file
// It is possible that the conditions we are comparing are part of a larger dataset
// If a protein has was not detected in either condition, then it should be removed
WAVE/T/Z SHORTNAME,NAME,PID
RemoveBlankLFQs(SHORTNAME,NAME,PID,allCond1,allCond2)
// deal with baseVal
TransformImputeBaseVal(allCond1)
TransformImputeBaseVal(allCond2)
End
// This function drives the whole program
Function MakeVolcano()
// as we come in we have two matrices for the conditions - data is Log2 transformed
WAVE/Z volcanoParamWave, allCond1, allCond2
Variable pairOpt = volcanoParamWave[1] // 1 is paired, 0 is not
Variable foldChange = log(abs(volcanoParamWave[3])) / log(2) // log2 value for threshold i.e. 1 is 2-fold change
Variable meanOpt = volcanoParamWave[6] // 1 is ratios v ratios, 0 is mean v mean
Variable perseusOpt = volcanoParamWave[7] // 1 is perseus style (difference), 0 is previous method
// at this point we have the consolidated merged data it is log2 transformed (following imputation)
Variable pVar
Variable nProt = DimSize(allCond1,0)
Make/O/N=(nProt) allTWave, colorWave=0
Variable i
// do T-tests
for(i = 0; i < nProt; i += 1)
// we extract the row per protein. NaNs are present from missing values and must be removed
MatrixOp/O/FREE w0 = row(allCond1,i) ^ t
WaveTransform zapnans w0
MatrixOp/O/FREE w1 = row(allCond2,i) ^ t
WaveTransform zapnans w1
if(pairOpt == 0)
// we assume equal variance
StatsTTest/TAIL=4/DFM=2/Q/Z w0,w1
else
StatsTTest/Q/Z/PAIR w0,w1
endif
WAVE/Z W_StatsTTest
if(V_flag == 0)
pVar = W_StatsTTest[%P] // p-value
else
pVar = 1
endif
allTWave[i] = pVar
endfor
// check that replicates are equal, if Ratios v Ratios is selected this is a requirement. Reset Opt if not
if(meanOpt == 1 && (DimSize(allCond1,1) != DimSize(allCond2,1)))
Print "Unequal replicates in test and control. Ratios v Ratios not possible."
meanOpt = 0
volcanoParamWave[6] = meanOpt
endif
// here we diverge if we are doing perseus style or not
if(perseusOpt == 0)
// make mean waves - these need transformation back
allCond1[][] = 2^(allCond1[p][q])
allCond2[][] = 2^(allCond2[p][q])
// because of potential NaNs we need a few extra steps
Duplicate/O/FREE allCond1, sumMat
sumMat[][] = (numtype(allCond1) == 2) ? 0 : 1
MatrixOp/O meanCond1 = sumrows(replaceNaNs(allCond1,0)) / sumrows(sumMat)
Duplicate/O/FREE allCond2, sumMat
sumMat[][] = (numtype(allCond2) == 2) ? 0 : 1
MatrixOp/O meanCond2 = sumrows(replaceNaNs(allCond2,0)) / sumrows(sumMat)
// if we are doing mean vs mean
if(meanOpt == 0)
// ratio wave
MatrixOp/O ratioWave = meanCond1 / meanCond2
else
// otherwise we will compare ratio vs ratio
MatrixOp/O/FREE ratioMat = allCond1 / allCond2
Duplicate/O/FREE ratioMat, sumMat
sumMat[][] = (numtype(ratioMat) == 2) ? 0 : 1
// ratio wave
MatrixOp/O ratioWave = sumrows(replaceNaNs(ratioMat,0)) / sumrows(sumMat)
// note that I tried an alternative way to calc meanCond1/2 and it was no better
endif
// ratios need converting to Log2 for volcanoPlot
Duplicate/O ratioWave,ratioWave_log2
ratioWave_log2[] = log(abs(ratioWave[p])) / log(2)
else
// because of potential NaNs we need a few extra steps
Duplicate/O/FREE allCond1, sumMat
sumMat[][] = (numtype(allCond1) == 2) ? 0 : 1
MatrixOp/O meanCond1 = sumrows(replaceNaNs(allCond1,0)) / sumrows(sumMat)
Duplicate/O/FREE allCond2, sumMat
sumMat[][] = (numtype(allCond2) == 2) ? 0 : 1
MatrixOp/O meanCond2 = sumrows(replaceNaNs(allCond2,0)) / sumrows(sumMat)
// we will leave these meanCond* waves in log2 space
// if we are doing mean vs mean
if(meanOpt == 0)
// DIFFERENCE wave - persesus style
MatrixOp/O ratioWave = meanCond1 - meanCond2
else
// otherwise we will compare ratio vs ratio
MatrixOp/O/FREE ratioMat = allCond1 - allCond2
Duplicate/O/FREE ratioMat, sumMat
sumMat[][] = (numtype(ratioMat) == 2) ? 0 : 1
// ratio wave
MatrixOp/O ratioWave = sumrows(replaceNaNs(ratioMat,0)) / sumrows(sumMat)
// note that I tried an alternative way to calc meanCond1/2 and it was no better
endif
// differences are in log2 so we need to copy them to the appropriate wave for volcanoPlot
Duplicate/O ratioWave,ratioWave_log2
// and then transform ratioWave back to linear
ratioWave[] = 2^(ratioWave_log2[p])
// transformation back to log2 for consistency with non-perseus approach
allCond1[][] = 2^(allCond1[p][q])
allCond2[][] = 2^(allCond2[p][q])
endif
// assign colors
colorWave[] = (ratioWave_log2[p] >= foldChange && abs(allTwave[p] <= 0.05)) ? 3 : colorWave[p]
colorWave[] = (ratioWave_log2[p] <= -foldChange && abs(allTwave[p] <= 0.05)) ? 2 : colorWave[p]
colorWave[] = (ratioWave_log2[p] >= foldChange && abs(allTwave[p] > 0.05)) ? 1 : colorWave[p]
// at the exit allCond1 and allCond2 are linear (for plotting on scatter plot)
End
/// @param m0 2D wave containing data to be imputed
STATIC Function TransformImputeBaseVal(m0)
WAVE m0
WAVE/Z volcanoParamWave = root:volcanoParamWave
// work out which wave we are doing condition 1 or condition 2
String wName = NameOfWave(m0)
Variable nn = str2num(wName[strlen(wName) - 1])
Variable baseVal = volcanoParamWave[0]
Variable seed = volcanoParamWave[3 + nn] / 1000 // will be row 4 for condition 1 and 5 for 2
// values from Perseus - working on each replicate not on whole matrix
Variable width = 0.3
Variable downShift = 1.8
if(volcanoParamWave[2] != 0)
SetRandomSeed seed
endif
// make a copy of the matrix
Duplicate/O/FREE m0,m1
// delete base value
if(numtype(baseVal) == 2)
// do nothing
else
m1[][] = (m1[p][q] == baseVal) ? NaN : m1[p][q]
endif
// log2 transform
m1[][] = log(m1[p][q]) / log(2)
Variable nCols = dimsize(m1,1)
Variable meanVar,sdVar
Variable i
for(i = 0; i < nCols; i += 1)
MatrixOp/O/FREE tempW = col(m1,i)
WaveTransform zapnans tempW // valid values
sdVar = sqrt(variance(tempW))
meanVar = mean(tempW) - (sdVar * downShift)
sdVar = sdVar * width
// add noise to base values in m0 and log transform real values col by col
if(numtype(baseVal) == 2)
// deal with NaN
m0[][i] = (numtype(m0[p][i]) == 2) ? meanVar + gnoise(sdVar) : m1[p][i]
else
m0[][i] = (m0[p][i] == baseVal) ? meanVar + gnoise(sdVar) : m1[p][i]
endif
endfor
// at the end of this function m0 is a log2 tranform of the input
End
Function MakeVPlot()
WAVE/Z allTWave,ratioWave_log2,colorWave,colorTableWave,volcanoParamWave
WAVE/Z/T volcanoLabelWave
KillWindow/Z volcanoPlot
Display/N=volcanoPlot/W=(35,45,430,734) allTWave vs ratioWave_log2
SetAxis/W=volcanoPlot/A/R/N=1 left
ModifyGraph/W=volcanoPlot log(left)=1
Variable maxVar = max(wavemax(ratioWave_log2),abs(wavemin(ratioWave_log2)))
Variable minPVar = wavemin(allTWave)
minPVar = 10 ^ (floor((log(minPVar))))
SetAxis/W=volcanoPlot bottom -maxVar,maxVar
ModifyGraph/W=volcanoPlot mode=3,marker=19,msize=2,mrkThick=0
SetDrawEnv/W=volcanoPlot xcoord= bottom,ycoord= left,dash= 3;DelayUpdate
DrawLine/W=volcanoPlot -maxVar,0.05,maxVar,0.05
SetDrawEnv/W=volcanoPlot xcoord= bottom,ycoord= left,dash= 3;DelayUpdate
DrawLine/W=volcanoPlot 0,1,0,minPVar
ModifyGraph/W=VolcanoPlot zColor(allTWave)={colorWave,0,3,cindexRGB,0,colorTableWave}
Label/W=VolcanoPlot left "P-Value"
String labelStr
if(volcanoParamWave[7] == 0)
labelStr = volcanoLabelWave[0] + " / " + volcanoLabelWave[1] + " (Log\\B2\\M)"
else
labelStr = "Difference (" + volcanoLabelWave[0] + " - " + volcanoLabelWave[1] + ")"
endif
Label/W=VolcanoPlot bottom labelStr
SetWindow VolcanoPlot, hook(modified)=thunk_hook
End
Function TableInterestingValues()
WAVE colorWave, allTWave, ratioWave, ratioWave_log2
WAVE/T SHORTNAME, NAME, PID // names of proteins hardcoded here
Duplicate/O allTWave, allTWave_log10
allTWave_log10 = -log(allTWave[p])
// manhattan distance
MatrixOp/O productWave = abs(allTWave_log10) + abs(ratioWave_log2)
Duplicate/O allTWave, so_allTWave
Duplicate/O ratioWave, so_ratioWave
Duplicate/O productWave, so_productWave
Duplicate/O colorWave, so_colorWave
Duplicate/O NAME, so_NAME
Duplicate/O SHORTNAME, so_SHORTNAME
Duplicate/O PID, so_PID
Make/O/N=(numpnts(ratioWave)) keyW=p
Duplicate/O keyW,so_keyW
Sort/R {so_colorWave,so_productWave}, so_allTWave, so_ratioWave, so_productWave, so_colorWave, so_NAME, so_SHORTNAME, so_PID, so_keyW
KillWindow/Z rankTable
Edit/N=rankTable/W=(432,45,942,734) so_NAME, so_SHORTNAME, so_PID, so_productWave, so_colorWave, so_allTWave, so_ratioWave, so_keyW
End
Function AddSignificantHitsToVolcano(vpOpt)
Variable vpOpt // 0 is original VP, 1 is filtered VP
String plotName
if(vpOpt == 0)
Wave/Z colorW = so_colorWave
Wave/Z/T shortnameW = so_SHORTNAME
plotName = "volcanoPlot"
else
Wave/Z colorW = ft_colorWave
Wave/Z/T shortnameW = ft_SHORTNAME
plotName = "ftVolcanoPlot"
endif
String labelStr = "\Z08"
Variable nRows = numpnts(colorW)
Variable i
for(i = 0; i < nRows; i += 1)
if(colorW[i] == 3)
if(strlen(shortnameW[i]) > 0)
if(strsearch(shortnameW[i],";",0) == -1)
labelStr += shortnameW[i] + "\r"
else
labelStr += StringFromList(0,shortNameW[i]) + "\r"
endif
endif
elseif(colorW[i] == 2)
break
endif
endfor
TextBox/W=$plotName/C/N=topProts/B=1/F=0/A=LT/X=0.00/Y=0.00 labelStr
End
Function MakeMeanComparison()
KillWindow/Z meanPlot
WAVE/Z meanCond1,meanCond2,colorWave,colorTableWave, volcanoParamWave
WAVE/Z/T volcanoLabelWave
if(!WaveExists(meanCond1) || !WaveExists(meanCond2))
Print "Error: no mean waves."
return -1
endif
Display/N=meanPlot/W=(944,45,1340,401) meanCond1 vs meanCond2
Variable maxVar = max(wavemax(meanCond1),wavemax(meanCond2))
Variable minVar = min(wavemin(meanCond1),wavemin(meanCond2))
if(volcanoParamWave[7] == 0)
maxVar = 10 ^ (ceil((log(maxVar))))
minVar = 10 ^ (floor((log(minVar)))) // if any Nans will this fail?
ModifyGraph/W=meanPlot log=1
else
maxVar = ceil(maxVar)
minVar = floor(minVar)
endif
SetAxis/W=meanPlot left minVar,maxVar
SetAxis/W=meanPlot bottom minVar,maxVar
ModifyGraph/W=meanPlot width={Plan,1,bottom,left}
ModifyGraph/W=meanPlot mode=3,marker=19
ModifyGraph/W=meanPlot zColor(meanCond1)={colorWave,0,3,cindexRGB,0,colorTableWave}
ModifyGraph/W=meanPlot msize=2,mrkThick=0
Label/W=meanPlot left volcanoLabelWave[0]
Label/W=meanPlot bottom volcanoLabelWave[1]
SetWindow meanPlot, hook(modified)=thunk_hook
End
STATIC Function FromVolcanoToPCA()
WAVE/Z allCond1, allCond2 // these are linear
WAVE/Z volcanoParamWave
WAVE/Z forPCA,forPCACol
KillWaves/Z forPCA,forPCACol
// we will use imputed values and we will take the ratio per replicate
// note that Volcano plot ratio is either the mean of cond1 / mean of cond2
// or the difference (subtraction) of log2 transformed data
Concatenate/O/NP=1 {allCond1,allCond2}, forPCACol
if(DimSize(allCond1,1) != DimSize(allCond2,1))
Print "Number of replicates between conditions differs. PCA is of all data, not ratios."
Concatenate/O/NP=1 {allCond1,allCond2}, forPCA
elseif(DimSize(allCond1,1) == 1)
Print "Single replicate dataset. No PCA."
Concatenate/O/NP=1 {allCond1,allCond2}, forPCA
return 0
else
if(volcanoParamWave[7] == 0)
// here we take the ratio of replicates and use the replicates for PCA, so...
MatrixOp/O forPCA = allCond1 / allCond2
else
// here we do the subtraction of replicates (log2 transformed) and use the replicates for PCA, so...
MatrixOp/O forPCA = log2(allCond1) - log2(allCond2)
// note that an alternative here would be to take Z (subtract each value from the row mean of 2nd group)
endif
endif
DoThePCA(0)
End
// this function is a way to assemble a matrix from a wavelist (of 1D data waves) to push towards PCA
// if using MaxQuant files, this option should not be needed
STATIC Function GetReadyForPCA(STRING selectedWavesList,VARIABLE baseVal)
Concatenate/O selectedWavesList, forPCA
// deal with baseVal - this uses random values even if a seed was used for other analyses
TransformImputeBaseVal(forPCA)
Duplicate/O forPCA,forPCACol
DoThePCA(1)
End
// This function will do the PCA
// Missing values (from a merge of different expts) can be dealt with in two ways using opt
Function DoThePCA(opt)
Variable opt // 1 means substitute 0 for missing values, 0 means delete whole row.
KillWindow/Z pcaPlot
KillWindow/Z pcaGroupPlot
WAVE/Z forPCA, forPCACol, volcanoParamWave
if(!WaveExists(forPCA))
DoAlert 0, "Missing a 2D wave, forPCA"
return 0
endif
Variable nProt = dimsize(forPCA,0)
Variable nRep = dimsize(forPCA,1)
Variable nRepCol = DimSize(forPCACol,1)
Variable nc
WAVE/Z colorWave, colorTableWave
if(!WaveExists(colorWave))
Make/O/N=(nProt) colorWave=0
endif
if(!WaveExists(colorTableWave))
MakeColorTableWave()
WAVE/Z colorTableWave
endif
// make a version of colorwave to display interesting proteins
Duplicate/O colorWave, colorPCAWave
// make another colorwave to show PCA columns
Make/O/N=(nRepCol) colorPCAColWave = p
colorPCAColWave[floor(nRepCol/2),*] += nRepCol/2
// pre-process data
MatrixOp/O forPCACol = log2(forPCACol)
if(volcanoParamWave[7] == 0)
// log2 transform
MatrixOp/O forPCA = log2(forPCA)
else
// do nothing the difference is already transformed
endif
// we have to deal with missing data
if(opt == 1)
// in case we have NaN or Inf - change to 0
forPCA[][] = (numtype(forPCA[p][q]) == 2 || numtype(forPCA[p][q]) == 1) ? 0 : forPCA[p][q]
forPCACol[][] = (numtype(forPCACol[p][q]) == 2 || numtype(forPCACol[p][q]) == 1) ? 0 : forPCACol[p][q]
else
// in case we have NaN or Inf - delete whole row
forPCA[][] = (numtype(forPCA[p][q]) == 2 || numtype(forPCA[p][q]) == 1) ? NaN : forPCA[p][q]
// set row with 1 or more NaN to all NaN
MatrixOp/O/FREE delW = sumrows(forPCA) / sumrows(forPCA)
forPCA[][] = forPCA[p][q] * delW[p]
MatrixOp/O forPCA = zapnans(forPCA)
nc = numpnts(forPCA) / nRep
MatrixOp/O forPCA = redimension(forPCA,nc,nRep)
// treat the colorPCAWave the same
colorPCAWave[] = colorPCAWave[p] * delW[p]
WaveTransform zapnans colorPCAWave
// now do column wave
forPCACol[][] = (numtype(forPCACol[p][q]) == 2 || numtype(forPCACol[p][q]) == 1) ? NaN : forPCACol[p][q]
// set row with 1 or more NaN to all NaN
MatrixOp/O/FREE delW = sumrows(forPCACol) / sumrows(forPCACol)
forPCACol[][] = forPCACol[p][q] * delW[p]
MatrixOp/O forPCACol = zapnans(forPCACol)
nc = numpnts(forPCACol) / nRepCol
MatrixOp/O forPCACol = redimension(forPCACol,nc,nRepCol)
endif
// centre the data
MatrixOp/O/FREE forPCARow = SubtractMean(forPCA,2)
// do the PCA for Rows, SRMT flag is needed to get M_R
PCA/ALL/SRMT forPCARow
WAVE/Z M_R
Duplicate/O M_R, M_RRow
// M_R is inverse compared to the output from SIMCA-P+
M_RRow *= -1
// and now for columns (we transpose and use rows
MatrixOp/O/FREE forPCACol = SubtractMean(forPCACol^t,2)
PCA/ALL/SRMT forPCACol
WAVE/Z M_R
Duplicate/O M_R, M_RCol
// graphs
// display PC1 and PC2
Display/N=pcaPlot/W=(36,757,431,965) M_RRow[][1] vs M_RRow[][0]
// find min and max for PC1 and PC2 combined
WaveStats/Q/RMD=[][0,1] M_RRow // we want min max of this not of forPCA
SetAxis/W=pcaPlot left V_min,V_max
SetAxis/W=pcaPlot bottom V_min,V_max
ModifyGraph/W=pcaPlot mode=3,marker=19,mrkThick=0
ModifyGraph/W=pcaPlot zColor(M_RRow)={colorPCAWave,0,3,ctableRGB,0,colorTableWave}
ModifyGraph/W=pcaPlot zmrkSize(M_RRow)={colorPCAWave,0,1,1,1.5}
ModifyGraph/W=pcaPlot zero=4,mirror=1
Label/W=pcaPlot left "PC2"
Label/W=pcaPlot bottom "PC1"
ModifyGraph/W=pcaPlot height={Plan,1,left,bottom}
// now the other
Display/N=pcaGroupPlot/W=(86,807,481,1105) M_RCol[][1] vs M_RCol[][0]
// find min and max for PC1 and PC2 combined
WaveStats/Q/RMD=[][0,1] M_RCol // we want min max of this not of forPCA
SetAxis/W=pcaGroupPlot left V_min,V_max
SetAxis/W=pcaGroupPlot bottom V_min,V_max
ModifyGraph/W=pcaGroupPlot mode=3,marker=8
ModifyGraph/W=pcaGroupPlot zColor(M_RCol)={colorPCAColWave,*,*,Rainbow,0}
ModifyGraph/W=pcaGroupPlot zero=4,mirror=1
Label/W=pcaGroupPlot left "PC2"
Label/W=pcaGroupPlot bottom "PC1"
ModifyGraph/W=pcaGroupPlot height={Plan,1,left,bottom}
// removing this because the thunk labels might not align after row deletions!
// SetWindow pcaPlot, hook(modified)=thunk_hook
End
// add labels to volcano plot to indicate the top x proteins
Function LabelTopXProts(numProteins)
Variable numProteins
if(strlen(WinList("volcanoPlot",";","WIN:1")) < 1)
DoAlert 0, "Make Volcano Plot first"
return -1
endif
WAVE/Z so_keyW
Make/O/N=(numProteins) topTenPos = so_KeyW[p]
WAVE/Z/T so_SHORTNAME
Make/O/N=(numProteins)/T topTenLabel = so_SHORTNAME[p]
Variable i
for(i = 0; i < numProteins; i += 1)
Tag/A=LB/C/N=$("top"+num2str(i))/B=1/F=0/S=3/V=1/L=0/X=1/Y=1/W=volcanoPlot allTWave, topTenPos[i], topTenLabel[i]
endfor
End
// disabling this function because the count comparison function has been deleted
// could be used to highlight proteins on another plot??
//// use a semi-colon separated list of protein names to highlight
//Function HighlightTheseProteins(myList)
// String myList
//
// WAVE/Z highlightWave
// if(!WaveExists(highlightWave))
// WAVE/Z colorWave
// Duplicate/O colorWave, highlightWave
// endif
//
// WAVE/Z/T SHORTNAME
// Variable nProteins = ItemsInList(myList)
// highlightWave = 0
// String proteinName
//
// Variable i
//
// for(i = 0; i < nProteins; i += 1)
// proteinName = StringFromList(i, myList)
// FindValue/TEXT=proteinName/TXOP=2 SHORTNAME
// highlightWave[V_Value] = 3
// endfor
//End
STATIC Function MakeTheLayout()
KillWindow/Z summaryLayout
NewLayout/N=summaryLayout
AppendLayoutObject/W=summaryLayout graph meanPlot
AppendLayoutObject/W=summaryLayout graph pcaPlot
AppendLayoutObject/W=summaryLayout graph volcanoPlot
AppendLayoutObject/W=summaryLayout graph pcaGroupPlot
LayoutPageAction/W=summaryLayout size(-1)=(595, 842), margins(-1)=(18, 18, 18, 18)
ModifyLayout/W=summaryLayout units=0
ModifyLayout/W=summaryLayout frame=0,trans=1
ModifyLayout/W=summaryLayout left(meanPlot)=322,top(meanPlot)=21,width(meanPlot)=258,height(meanPlot)=222
ModifyLayout/W=summaryLayout left(pcaPlot)=322,top(pcaPlot)=244,width(pcaPlot)=260,height(pcaPlot)=224
ModifyLayout/W=summaryLayout left(pcaGroupPlot)=322,top(pcaGroupPlot)=484,width(pcaGroupPlot)=260,height(pcaGroupPlot)=224
ModifyLayout/W=summaryLayout left(volcanoPlot)=21,top(volcanoPlot)=21,height(volcanoPlot)=450,width(volcanoPlot)=320
End
Function SaveTheLayout()
WAVE/Z/T volcanoLabelWave
if(!WaveExists(volcanoLabelWave))
DoAlert 0, "Run Volcano Plot first."
return -1
endif
// check that DiskFolder path exists and create if not
PathInfo DiskFolder
if(V_flag == 0)
NewPath DiskFolder
endif
String fileName = "summaryLayout_" + volcanoLabelWave[0] + "vs" + volcanoLabelWave[1] + ".pdf"
SavePICT/O/WIN=summaryLayout/P=DiskFolder/E=-2/W=(0,0,0,0) as fileName
fileName = "summaryLayout_" + volcanoLabelWave[0] + "vs" + volcanoLabelWave[1] + ".png"
SavePICT/O/WIN=summaryLayout/P=DiskFolder/E=-5/RES=300 as fileName
End
Function SaveTheTable()
WAVE/Z/T volcanoLabelWave
if(!WaveExists(volcanoLabelWave))
DoAlert 0, "Run Volcano Plot first."
return -1
endif
// check that DiskFolder path exists and create if not
PathInfo DiskFolder
if(V_flag == 0)
NewPath DiskFolder
endif
String fileName = "rankTable_" + volcanoLabelWave[0] + "vs" + volcanoLabelWave[1] + ".txt"
Save/B/J/M="\n"/P=DiskFolder/W "so_NAME;so_SHORTNAME;so_PID;so_productWave;so_colorWave;so_allTWave;so_ratioWave;so_keyW;" as fileName
End
Function SaveUnsortedDataMeans()
WAVE/Z/T volcanoLabelWave
if(!WaveExists(volcanoLabelWave))
DoAlert 0, "Run Volcano Plot first."
return -1
endif
// check that DiskFolder path exists and create if not
PathInfo DiskFolder
if(V_flag == 0)
NewPath DiskFolder
endif
String fileName = "withMeans_" + volcanoLabelWave[0] + "vs" + volcanoLabelWave[1] + ".txt"
Save/B/J/M="\n"/P=DiskFolder/W "NAME;SHORTNAME;PID;productWave;colorWave;allTWave;ratioWave;meanCond1;meanCond2;" as fileName
End
Function SaveForProteore()
WAVE/Z/T volcanoLabelWave
if(!WaveExists(volcanoLabelWave))
DoAlert 0, "Run Volcano Plot first."
return -1
endif
// check that DiskFolder path exists and create if not
PathInfo DiskFolder
if(V_flag == 0)
NewPath DiskFolder
endif
String fileName
WAVE/Z/T so_SHORTNAME,so_NAME,so_PID
WAVE/Z so_colorWAVE
// any semi-colon separated lists must be quoted
Duplicate/O/T so_SHORTNAME, proteore1
Duplicate/O/T so_NAME, proteore2
Duplicate/O/T so_PID, proteore3
String wList = WaveList("proteore*",";","")
Variable nWaves = ItemsInList(wList)
Variable i
for(i = 0; i < nWaves; i += 1)
Wave/T tw = $(StringFromList(i, wList))
tw[] = SelectString(ItemsInList(tw[p]) < 2, "\"" + tw[p] + "\"", tw[p])
endfor
// make "cluster wave"
Make/O/N=(numpnts(so_colorWave))/T proteore4
Wave/T proteore4
proteore4[] = SelectString(so_colorWave[p] == 3, "below", "enriched")
// save file
fileName = "rankTable_" + volcanoLabelWave[0] + "vs" + volcanoLabelWave[1] + "all.txt"
Save/O/B/J/M="\n"/P=DiskFolder "proteore1;proteore2;proteore3;proteore4;" as fileName
// now export a list of only the enriched ones
WaveStats/Q so_colorWave
if(V_Max < 3)
Print "No enrichment, so no text file saved."
return 0
endif
FindLevel/P/Q so_colorWave, 2
if(V_flag == 1)
FindLevel/P/Q so_colorWave, 1
if(V_flag == 1)
FindLevel/P/Q so_colorWave, 0
endif
endif
Duplicate/O/T/RMD=[0,V_levelX - 1] proteore1,proteore5
Duplicate/O/T/RMD=[0,V_levelX - 1] proteore2,proteore6
Duplicate/O/T/RMD=[0,V_levelX - 1] proteore3,proteore7
Duplicate/O/T/RMD=[0,V_levelX - 1] proteore4,proteore8
fileName = "rankTable_" + volcanoLabelWave[0] + "vs" + volcanoLabelWave[1] + "enriched.txt"
Save/O/B/J/M="\n"/P=DiskFolder "proteore5;proteore6;proteore7;proteore8;" as fileName
// tidy up
wList = WaveList("proteore*",";","")
nWaves = ItemsInList(wList)
for(i = 0; i < nWaves; i += 1)
Wave/T tw = $(StringFromList(i, wList))
KillWaves/Z tw
endfor
End
// This function will load an output from UniProt.
// Using a list of gene names from VolcanoPlot output (via copy/paste), query in UniProt ID mapping
// select columns for Gene ontology (cellular component) & Subcellular location [CC]
Function LoadUniprot()
if(!DataFolderExists("root:data"))
NewDataFolder/O root:data
endif
NewDataFolder/O/S root:data:uniprot
// load the tab-separated output downloaded from UniProt
LoadWave/A/D/J/K=0/L={0,0,0,0,0}/V={"\t","",0,0}/W/O/Q ""
if (strlen(S_Path) == 0) // user pressed cancel
return -1
endif
SetDataFolder root:
return 0
End
////-- Below here are functions that label existing volcano plot outputs according to GO Terms
Function UniprotTable()
WAVE/Z/T SHORTNAME
Duplicate/O/T SHORTNAME, clickMe
DeleteEmptyCellsFromTextWave(clickMe)
KillWindow/Z closeTableWhenFinished
Edit/N=closeTableWhenFinished/K=1 clickMe as "Close this table when finished"
DoAlert 0, "Click `clickMe` to select the column.\rCopy data to clipboard and use at:\rhttps://www.uniprot.org/uploadlists"
End
Function MatchResultsAndUniprot()
// first we need to check that we have results and uniprot data
SetDataFolder root:
WAVE/Z/T SHORTNAME
if(!WaveExists(SHORTNAME))
Print "Proteomics data not loaded"
return -1
endif
Wave/Z/T Gene_names = root:data:uniprot:Gene_names
Wave/Z/T GOw = root:data:uniprot:Gene_ontology__cellular_component_
Wave/Z/T SCw = root:data:uniprot:Subcellular_location__CC_
if(!WaveExists(Gene_names) || !WaveExists(GOw) || !WaveExists(SCw))
Print "Uniprot data not loaded"
return -1
endif
Variable nHits = numpnts(SHORTNAME)
Variable nGenes = numpnts(Gene_names)
// first make a textwave of Gene_names where the alternatives are semi-colon separated
Duplicate/O/FREE/T Gene_Names, gnW
gnW[] = ReplaceString(" ",gnW[p],";") + ";"
// make the wave to hold the GO terms and subcellular info
Make/O/T/N=(nHits) GO_Terms, SC_Terms
String sName, gNameList
Variable matchVar