% ScoreSingle: Find the mean and standard deviation of % the scores of all the members of a protein-domain family % against a series of templates for another or the same % protein-domain family. Use two measures in the % score calculation. % % Scott F. Smith % Department of Electrical and Computer Engineering % Boise State University % SFSmith@BoiseState.edu % % 19 February 2005 % Choose two sets of twenty measure values for amino acids % Choices are: MeasuresHydro1 (Kyte and Doolittle) % MeasuresHydro2 (Engelman, Steitz, and Goldman) % MeasuresVolume (Zamyatin) % MeasuresCharge MeasuresHydro1 measuresB = measures; MeasuresVolume % Read two sets of templates (WD40, LRR, TPR, ANK, GP120, % RVT, ZFC2H2, RVP, CytocromBN, COX, Oxidored, CytochromBC, % ABCtran, RuBisCO, RuBisCON, Pkinase, PPR, RVTthumb, HCV, 7tm). cd('TemplatesHydro1'); templates7tm cd('..'); Bt_all = t_all; Bt_90 = t_90; Bt_80 = t_80; Bt_70 = t_70; Bt_60 = t_60; Bt_50 = t_50; Bt_40 = t_40; Bt_30 = t_30; Bt_20 = t_20; Bt_10 = t_10; Bt_0 = t_0; cd('TemplatesVolume'); Templates7tm cd('..'); % Read in sequences for comparison Sequences = []; fileIn = fopen('Data/7tm.txt'); while 1 FileLine = fgetl(fileIn); if ~ischar(FileLine) break end Sequences = [Sequences; FileLine(24:length(FileLine))]; end fclose(fileIn); Sequences = upper(Sequences); [NumbSeq, NumbPos] = size(Sequences); Scores = []; for seq = 1:NumbSeq if seq == floor(NumbSeq/5) disp('.') end if seq == floor(2*NumbSeq/5) disp('.') end if seq == floor(3*NumbSeq/5) disp('.') end if seq == floor(4*NumbSeq/5) disp('.') end % For each sequence, strip out gaps TempSeq = Sequences(seq,:); TestSeq = []; for i = 1:NumbPos if (TempSeq(i) ~= '.') & (TempSeq(i) ~= ' ') TestSeq = [TestSeq TempSeq(i)]; end end TestPos = length(TestSeq); % Convert sequence to hydrophobicities TestHydro = zeros(1, TestPos); TestVolume = zeros(1, TestPos); for i = 1:TestPos if TestSeq(i) == 'A' TestHydro(i) = measures(1); TestVolume(i) = measuresB(1); end if TestSeq(i) == 'C' TestHydro(i) = measures(2); TestVolume(i) = measuresB(2); end if TestSeq(i) == 'D' TestHydro(i) = measures(3); TestVolume(i) = measuresB(3); end if TestSeq(i) == 'E' TestHydro(i) = measures(4); TestVolume(i) = measuresB(4); end if TestSeq(i) == 'F' TestHydro(i) = measures(5); TestVolume(i) = measuresB(5); end if TestSeq(i) == 'G' TestHydro(i) = measures(6); TestVolume(i) = measuresB(6); end if TestSeq(i) == 'H' TestHydro(i) = measures(7); TestVolume(i) = measuresB(7); end if TestSeq(i) == 'I' TestHydro(i) = measures(8); TestVolume(i) = measuresB(8); end if TestSeq(i) == 'K' TestHydro(i) = measures(9); TestVolume(i) = measuresB(9); end if TestSeq(i) == 'L' TestHydro(i) = measures(10); TestVolume(i) = measuresB(10); end if TestSeq(i) == 'M' TestHydro(i) = measures(11); TestVolume(i) = measuresB(11); end if TestSeq(i) == 'N' TestHydro(i) = measures(12); TestVolume(i) = measuresB(12); end if TestSeq(i) == 'P' TestHydro(i) = measures(13); TestVolume(i) = measuresB(13); end if TestSeq(i) == 'Q' TestHydro(i) = measures(14); TestVolume(i) = measuresB(14); end if TestSeq(i) == 'R' TestHydro(i) = measures(15); TestVolume(i) = measuresB(15); end if TestSeq(i) == 'S' TestHydro(i) = measures(16); TestVolume(i) = measuresB(16); end if TestSeq(i) == 'T' TestHydro(i) = measures(17); TestVolume(i) = measuresB(17); end if TestSeq(i) == 'V' TestHydro(i) = measures(18); TestVolume(i) = measuresB(18); end if TestSeq(i) == 'W' TestHydro(i) = measures(19); TestVolume(i) = measuresB(19); end if TestSeq(i) == 'Y' TestHydro(i) = measures(20); TestVolume(i) = measuresB(20); end end % Find maximum correlation between test sequence and t_all TemplatePos = length(t_all); MaxCorrel = -99999; if TestPos > TemplatePos numb_attempts = 1+TestPos-TemplatePos; for i = 1:numb_attempts correl = TestHydro(i:TemplatePos+i-1)*t_all.' + TestVolume(i:TemplatePos+i-1)*Bt_all.'; MaxCorrel = max([MaxCorrel correl]); end else numb_attempts = 1+TemplatePos-TestPos; for i = 1:numb_attempts correl = TestHydro*t_all(i:TestPos+i-1).' + TestVolume*Bt_all(i:TestPos+i-1).'; MaxCorrel = max([MaxCorrel correl]); end end MaxCorrel = MaxCorrel / sqrt(numb_attempts); BestMaxCorrel = MaxCorrel; % Find maximum correlation between test sequence and t_90 if length(t_90) ~= length(t_all) TemplatePos = length(t_90); MaxCorrel = -99999; if TestPos > TemplatePos numb_attempts = 1+TestPos-TemplatePos; for i = 1:numb_attempts correl = TestHydro(i:TemplatePos+i-1)*t_90.' + TestVolume(i:TemplatePos+i-1)*Bt_90.'; MaxCorrel = max([MaxCorrel correl]); end else numb_attempts = 1+TemplatePos-TestPos; for i = 1:numb_attempts correl = TestHydro*t_90(i:TestPos+i-1).' + TestVolume*Bt_90(i:TestPos+i-1).'; MaxCorrel = max([MaxCorrel correl]); end end MaxCorrel = MaxCorrel / sqrt(numb_attempts); BestMaxCorrel = max([BestMaxCorrel MaxCorrel]); end % Find maximum correlation between test sequence and t_80 if length(t_80) ~= length(t_90) TemplatePos = length(t_80); MaxCorrel = -99999; if TestPos > TemplatePos numb_attempts = 1+TestPos-TemplatePos; for i = 1:numb_attempts correl = TestHydro(i:TemplatePos+i-1)*t_80.' + TestVolume(i:TemplatePos+i-1)*Bt_80.'; MaxCorrel = max([MaxCorrel correl]); end else numb_attempts = 1+TemplatePos-TestPos; for i = 1:numb_attempts correl = TestHydro*t_80(i:TestPos+i-1).' + TestVolume*Bt_80(i:TestPos+i-1).'; MaxCorrel = max([MaxCorrel correl]); end end MaxCorrel = MaxCorrel / sqrt(numb_attempts); BestMaxCorrel = max([BestMaxCorrel MaxCorrel]); end % Find maximum correlation between test sequence and t_70 if length(t_70) ~= length(t_80) TemplatePos = length(t_70); MaxCorrel = -99999; if TestPos > TemplatePos numb_attempts = 1+TestPos-TemplatePos; for i = 1:numb_attempts correl = TestHydro(i:TemplatePos+i-1)*t_70.' + TestVolume(i:TemplatePos+i-1)*Bt_70.'; MaxCorrel = max([MaxCorrel correl]); end else numb_attempts = 1+TemplatePos-TestPos; for i = 1:numb_attempts correl = TestHydro*t_70(i:TestPos+i-1).' + TestVolume*Bt_70(i:TestPos+i-1).'; MaxCorrel = max([MaxCorrel correl]); end end MaxCorrel = MaxCorrel / sqrt(numb_attempts); BestMaxCorrel = max([BestMaxCorrel MaxCorrel]); end % Find maximum correlation between test sequence and t_60 if length(t_60) ~= length(t_70) TemplatePos = length(t_60); MaxCorrel = -99999; if TestPos > TemplatePos numb_attempts = 1+TestPos-TemplatePos; for i = 1:numb_attempts correl = TestHydro(i:TemplatePos+i-1)*t_60.' + TestVolume(i:TemplatePos+i-1)*Bt_60.'; MaxCorrel = max([MaxCorrel correl]); end else numb_attempts = 1+TemplatePos-TestPos; for i = 1:numb_attempts correl = TestHydro*t_60(i:TestPos+i-1).' + TestVolume*Bt_60(i:TestPos+i-1).'; MaxCorrel = max([MaxCorrel correl]); end end MaxCorrel = MaxCorrel / sqrt(numb_attempts); BestMaxCorrel = max([BestMaxCorrel MaxCorrel]); end % Find maximum correlation between test sequence and t_50 if length(t_50) ~= length(t_60) TemplatePos = length(t_50); MaxCorrel = -99999; if TestPos > TemplatePos numb_attempts = 1+TestPos-TemplatePos; for i = 1:numb_attempts correl = TestHydro(i:TemplatePos+i-1)*t_50.' + TestVolume(i:TemplatePos+i-1)*Bt_50.'; MaxCorrel = max([MaxCorrel correl]); end else numb_attempts = 1+TemplatePos-TestPos; for i = 1:numb_attempts correl = TestHydro*t_50(i:TestPos+i-1).' + TestVolume*Bt_50(i:TestPos+i-1).'; MaxCorrel = max([MaxCorrel correl]); end end MaxCorrel = MaxCorrel / sqrt(numb_attempts); BestMaxCorrel = max([BestMaxCorrel MaxCorrel]); end % Find maximum correlation between test sequence and t_40 if length(t_40) ~= length(t_50) TemplatePos = length(t_40); MaxCorrel = -99999; if TestPos > TemplatePos numb_attempts = 1+TestPos-TemplatePos; for i = 1:numb_attempts correl = TestHydro(i:TemplatePos+i-1)*t_40.' + TestVolume(i:TemplatePos+i-1)*Bt_40.'; MaxCorrel = max([MaxCorrel correl]); end else numb_attempts = 1+TemplatePos-TestPos; for i = 1:numb_attempts correl = TestHydro*t_40(i:TestPos+i-1).' + TestVolume*Bt_40(i:TestPos+i-1).'; MaxCorrel = max([MaxCorrel correl]); end end MaxCorrel = MaxCorrel / sqrt(numb_attempts); BestMaxCorrel = max([BestMaxCorrel MaxCorrel]); end % Find maximum correlation between test sequence and t_30 if length(t_30) ~= length(t_40) TemplatePos = length(t_30); MaxCorrel = -99999; if TestPos > TemplatePos numb_attempts = 1+TestPos-TemplatePos; for i = 1:numb_attempts correl = TestHydro(i:TemplatePos+i-1)*t_30.' + TestVolume(i:TemplatePos+i-1)*Bt_30.'; MaxCorrel = max([MaxCorrel correl]); end else numb_attempts = 1+TemplatePos-TestPos; for i = 1:numb_attempts correl = TestHydro*t_30(i:TestPos+i-1).' + TestVolume*Bt_30(i:TestPos+i-1).'; MaxCorrel = max([MaxCorrel correl]); end end MaxCorrel = MaxCorrel / sqrt(numb_attempts); BestMaxCorrel = max([BestMaxCorrel MaxCorrel]); end % Find maximum correlation between test sequence and t_20 if length(t_20) ~= length(t_30) TemplatePos = length(t_20); MaxCorrel = -99999; if TestPos > TemplatePos numb_attempts = 1+TestPos-TemplatePos; for i = 1:numb_attempts correl = TestHydro(i:TemplatePos+i-1)*t_20.' + TestVolume(i:TemplatePos+i-1)*Bt_20.'; MaxCorrel = max([MaxCorrel correl]); end else numb_attempts = 1+TemplatePos-TestPos; for i = 1:numb_attempts correl = TestHydro*t_20(i:TestPos+i-1).' + TestVolume*Bt_20(i:TestPos+i-1).'; MaxCorrel = max([MaxCorrel correl]); end end MaxCorrel = MaxCorrel / sqrt(numb_attempts); BestMaxCorrel = max([BestMaxCorrel MaxCorrel]); end % Find maximum correlation between test sequence and t_10 if length(t_10) ~= length(t_20) TemplatePos = length(t_10); MaxCorrel = -99999; if TestPos > TemplatePos numb_attempts = 1+TestPos-TemplatePos; for i = 1:numb_attempts correl = TestHydro(i:TemplatePos+i-1)*t_10.' + TestVolume(i:TemplatePos+i-1)*Bt_10.'; MaxCorrel = max([MaxCorrel correl]); end else numb_attempts = 1+TemplatePos-TestPos; for i = 1:numb_attempts correl = TestHydro*t_10(i:TestPos+i-1).' + TestVolume*Bt_10(i:TestPos+i-1).'; MaxCorrel = max([MaxCorrel correl]); end end MaxCorrel = MaxCorrel / sqrt(numb_attempts); BestMaxCorrel = max([BestMaxCorrel MaxCorrel]); end % Find maximum correlation between test sequence and t_0 if length(t_0) ~= length(t_10) TemplatePos = length(t_0); MaxCorrel = -99999; if TestPos > TemplatePos numb_attempts = 1+TestPos-TemplatePos; for i = 1:numb_attempts correl = TestHydro(i:TemplatePos+i-1)*t_0.' + TestVolume(i:TemplatePos+i-1)*Bt_0.'; MaxCorrel = max([MaxCorrel correl]); end else numb_attempts = 1+TemplatePos-TestPos; for i = 1:numb_attempts correl = TestHydro*t_0(i:TestPos+i-1).' + TestVolume*Bt_0(i:TestPos+i-1).'; MaxCorrel = max([MaxCorrel correl]); end end MaxCorrel = MaxCorrel / sqrt(numb_attempts); BestMaxCorrel = max([BestMaxCorrel MaxCorrel]); end Scores = [Scores BestMaxCorrel]; end mean_score = mean(Scores) sd_score = std(Scores)