SOV(segment overlap)是測量預測蛋白質二級體結構的一種方法
跟Q3的方法不同的是以片段的觀念比較
以下用例子說明:
假設有一條蛋白質結構為:CCHHHHHCCCCCCCHHHHHHHHCHHHHHHHHCCCC
預測出來的結果為: CCHCHCHCCCCCCCCHCHCHCHCHHHCHHHHCCCC
以Q3來測量為 28 / 35 = 80%(一般而言Q3大於70%就是相當不錯的預結果)
但實際上蛋白質二級體結構並不會出現單一helix這種結構
用Q3來測量並不能符合需求,所以才產生SOV的方法。
詳細的SOV的公式和介紹請參考:
1.A modified Definition of Sov, a Segment-Based Measure for Protein Secondary Structure Prediction Assessment.(Adam Zemla etc al., PROTEINS:Structure,Function,and Genetics 34:220-223, 1999)
2.EVA measures for secondary structure prediction accuracy
SOV和Q3都是常見到的方法,但網路上似乎沒有找到有人寫好的Code(或許是我找不到吧),雖然有一個server(SOV - segment overlap measure)提供SOV的計算但它有長度的限制,所以我有寫一個SOV JAVA的class,給各位參考一下,有需要的人也可以拿去用,但小弟是JAVA coding的新手,沒有寫的很好請多多包涵。
使用方法
SOV sov = new SOV(observedString, predictedString);
sov.getScore();
Code:
01
02 03 public class SOV { 04 05 /** Observed */ 06 private String[] ObsHStart; 07 private String[] ObsHEnd; 08 09 10 private String[] ObsEStart; 11 private String[] ObsEEnd; 12 13 private String[] ObsCStart; 14 private String[] ObsCEnd; 15 16 /** Predicted */ 17 private String[] preHStart; 18 private String[] preHEnd; 19 20 private String[] preEStart; 21 private String[] preEEnd; 22 23 private String[] preCStart; 24 private String[] preCEnd; 25 26 public SOV(String observedSeq, String predictedSeq) { 27 String[] temp = stringArray(observedSeq,'H').split(":"); 28 if (temp.length > 0) { 29 ObsHStart = temp[0].split(","); 30 ObsHEnd = temp[1].split(","); 31 } else { 32 ObsHStart = null; 33 ObsHEnd = null; 34 } 35 36 temp = stringArray(observedSeq,'E').split(":"); 37 if (temp.length > 0) { 38 ObsEStart = temp[0].split(","); 39 ObsEEnd = temp[1].split(","); 40 } else { 41 ObsEStart = null; 42 ObsEEnd = null; 43 } 44 45 temp = stringArray(observedSeq,'C').split(":"); 46 if (temp.length > 0) { 47 ObsCStart = temp[0].split(","); 48 ObsCEnd = temp[1].split(","); 49 } else { 50 ObsCStart = null; 51 ObsCEnd = null; 52 } 53 54 temp = stringArray(predictedSeq,'H').split(":"); 55 if (temp.length > 0) { 56 preHStart = temp[0].split(","); 57 preHEnd = temp[1].split(","); 58 } else { 59 preHStart = null; 60 preHEnd = null; 61 } 62 63 temp = stringArray(predictedSeq,'E').split(":"); 64 if (temp.length > 0) { 65 preEStart = temp[0].split(","); 66 preEEnd = temp[1].split(","); 67 } else { 68 preEStart = null; 69 preEEnd = null; 70 } 71 72 temp = stringArray(predictedSeq,'C').split(":"); 73 if (temp.length > 0) { 74 preCStart = temp[0].split(","); 75 preCEnd = temp[1].split(","); 76 } else { 77 preCStart = null; 78 preCEnd = null; 79 } 80 } 81 82 public double getScore() { 83 double HsegmentValue = function(ObsHStart, ObsHEnd, preHStart, preHEnd); 84 double EsegmentValue = function(ObsEStart, ObsEEnd, preEStart, preEEnd); 85 double CsegmentValue = function(ObsCStart, ObsCEnd, preCStart, preCEnd); 86 double segmentSum = HsegmentValue + EsegmentValue + CsegmentValue; 87 88 int HValue = NValue(ObsHStart, ObsHEnd, preHStart, preHEnd); 89 int EValue = NValue(ObsEStart, ObsEEnd, preEStart, preEEnd); 90 int CValue = NValue(ObsCStart, ObsCEnd, preCStart, preCEnd); 91 int NSum = HValue + EValue + CValue; 92 93 return (double)100 / NSum * segmentSum; 94 } 95 96 private String stringArray(String sequence, char ch) { 97 char[] seqArray = sequence.toCharArray(); 98 StringBuilder start = new StringBuilder(); 99 StringBuilder end = new StringBuilder(); 100 101 for (int count = 0; count < seqArray.length; count++) { 102 if (seqArray[count] == ch) { 103 start.append(count).append(','); 104 while (seqArray[count] == ch) { 105 if (count == seqArray.length - 1) { 106 count++; 107 break; 108 } 109 count++; 110 } 111 end.append(count - 1).append(','); 112 } 113 } 114 start.append(':').append(end.toString()); 115 return start.toString(); 116 117 } 118 119 private double function(String[] obsStart, String[] obsEnd, String[] preStart, String[] preEnd) { 120 if (obsStart == null || preStart == null) return 0.0; 121 int oStart, oEnd, pStart, pEnd, start, end; 122 int minov, maxov, delta, s1Length, s2Length; 123 double funcSum = 0.0; 124 for (int count = 0; count < obsStart.length; count++) { 125 oStart = Integer.parseInt(obsStart[count]); 126 oEnd = Integer.parseInt(obsEnd[count]); 127 128 for (int count2 = 0; count2 < preStart.length; count2++) { 129 pStart = Integer.parseInt(preStart[count2]); 130 pEnd = Integer.parseInt(preEnd[count2]); 131 132 if ( (pStart >= oStart && pStart <= oEnd) || (pEnd >= oStart && pEnd <= oEnd) ) { 133 if ( pStart >= oStart ) 134 start = oStart; 135 else start = pStart; 136 137 if ( pEnd >= oEnd ) 138 end = pEnd; 139 else end = oEnd; 140 maxov = end - start + 1; 141 142 if ( pStart >= oStart ) 143 start = pStart; 144 else start = oStart; 145 146 if ( pEnd >= oEnd ) 147 end = oEnd; 148 else end = pEnd; 149 minov = end - start + 1; 150 151 s1Length = oEnd - oStart + 1; 152 s2Length = pEnd - pStart + 1; 153 154 funcSum += (double)(minov + DELTA(maxov, minov, s1Length, s2Length)) / maxov * s1Length; 155 } 156 } 157 } 158 return funcSum; 159 } 160 161 private int DELTA(int maxov, int minov, int s1Length, int s2Length) { 162 int min, min2; 163 if (maxov - minov <= minov) 164 min = maxov - minov; 165 else min = minov; 166 167 if ((int)s1Length / 2 <= (int)s2Length / 2) 168 min2 = (int)s1Length / 2; 169 else min2 = (int)s2Length / 2; 170 171 if (min <= min2) 172 return min; 173 else return min2; 174 } 175 176 private int NValue(String[] obsStart, String[] obsEnd, String[] preStart, String[] preEnd) { 177 if (obsStart == null || preStart == null) return 0; 178 int oStart, oEnd, pStart, pEnd; 179 int sum = 0; 180 for (int count = 0; count < obsStart.length; count++) { 181 int overlapCount = 0; 182 oStart = Integer.parseInt(obsStart[count]); 183 oEnd = Integer.parseInt(obsEnd[count]); 184 185 for (int count2 = 0; count2 < preStart.length; count2++) { 186 pStart = Integer.parseInt(preStart[count2]); 187 pEnd = Integer.parseInt(preEnd[count2]); 188 189 if ( (pStart >= oStart && pStart <= oEnd) || (pEnd >= oStart && pEnd <= oEnd) ) { 190 sum += oEnd - oStart + 1; 191 overlapCount++; 192 } 193 } 194 if (overlapCount == 0) 195 sum += oEnd - oStart + 1; 196 } 197 198 return sum; 199 } 200 } 201
