% HydroCorr: Find the best hydrophobicity correlation % fit between query strings and templates. % % Scott F. Smith % Department of Electrical and Computer Engineering % Boise State University % SFSmith@BoiseState.edu % % 19 January 2005 % Read templates (WD40, LRR, TPR, ANK, GP120, % RVT, ZFC2H2, RVP, CytocromBN, COX, Oxidored, CytochromBC, % ABCtran, RuBisCO, RuBisCON, Pkinase, PPR, RVTthumb, HCV, 7tm). TemplatesPPR % Read in sequences for comparison Sequences = []; fileIn = fopen('Data/WD40/plain.txt'); while 1 FileLine = fgetl(fileIn); if ~ischar(FileLine) break end Sequences = [Sequences; FileLine(24:length(FileLine))]; end fclose(fileIn); Sequences = upper(Sequences); [NumbSeq, NumbPos] = size(Sequences); Scores = []; for seq = 1:NumbSeq % For each sequence, strip out gaps TempSeq = Sequences(seq,:); TestSeq = []; for i = 1:NumbPos if (TempSeq(i) ~= '.') & (TempSeq(i) ~= ' ') TestSeq = [TestSeq TempSeq(i)]; end end TestPos = length(TestSeq); % Convert sequence to hydrophobicities TestHydro = zeros(1, TestPos); for i = 1:TestPos if TestSeq(i) == 'A' TestHydro(i) = 1.8; end if TestSeq(i) == 'C' TestHydro(i) = 2.5; end if TestSeq(i) == 'D' TestHydro(i) = -3.5; end if TestSeq(i) == 'E' TestHydro(i) = -3.5; end if TestSeq(i) == 'F' TestHydro(i) = 2.8; end if TestSeq(i) == 'G' TestHydro(i) = -0.4; end if TestSeq(i) == 'H' TestHydro(i) = -3.2; end if TestSeq(i) == 'I' TestHydro(i) = 4.5; end if TestSeq(i) == 'K' TestHydro(i) = -3.9; end if TestSeq(i) == 'L' TestHydro(i) = 3.8; end if TestSeq(i) == 'M' TestHydro(i) = 1.9; end if TestSeq(i) == 'N' TestHydro(i) = -3.5; end if TestSeq(i) == 'P' TestHydro(i) = -1.6; end if TestSeq(i) == 'Q' TestHydro(i) = -3.5; end if TestSeq(i) == 'R' TestHydro(i) = -4.5; end if TestSeq(i) == 'S' TestHydro(i) = -0.8; end if TestSeq(i) == 'T' TestHydro(i) = -0.7; end if TestSeq(i) == 'V' TestHydro(i) = 4.2; end if TestSeq(i) == 'W' TestHydro(i) = -0.9; end if TestSeq(i) == 'Y' TestHydro(i) = -1.3; end end TestHydro = TestHydro / 4.5; % Find maximum correlation between test sequence and t_all TemplatePos = length(t_all); MaxCorrel = -99999; if TestPos > TemplatePos numb_attempts = 1+TestPos-TemplatePos; for i = 1:numb_attempts correl = TestHydro(i:TemplatePos+i-1)*t_all.'; MaxCorrel = max([MaxCorrel correl]); end else numb_attempts = 1+TemplatePos-TestPos; for i = 1:numb_attempts correl = TestHydro*t_all(i:TestPos+i-1).'; MaxCorrel = max([MaxCorrel correl]); end end MaxCorrel = MaxCorrel / sqrt(numb_attempts); BestMaxCorrel = MaxCorrel; % Find maximum correlation between test sequence and t_90 TemplatePos = length(t_90); MaxCorrel = -99999; if TestPos > TemplatePos numb_attempts = 1+TestPos-TemplatePos; for i = 1:numb_attempts correl = TestHydro(i:TemplatePos+i-1)*t_90.'; MaxCorrel = max([MaxCorrel correl]); end else numb_attempts = 1+TemplatePos-TestPos; for i = 1:numb_attempts correl = TestHydro*t_90(i:TestPos+i-1).'; MaxCorrel = max([MaxCorrel correl]); end end MaxCorrel = MaxCorrel / sqrt(numb_attempts); BestMaxCorrel = max([BestMaxCorrel MaxCorrel]); % Find maximum correlation between test sequence and t_80 TemplatePos = length(t_80); MaxCorrel = -99999; if TestPos > TemplatePos numb_attempts = 1+TestPos-TemplatePos; for i = 1:numb_attempts correl = TestHydro(i:TemplatePos+i-1)*t_80.'; MaxCorrel = max([MaxCorrel correl]); end else numb_attempts = 1+TemplatePos-TestPos; for i = 1:numb_attempts correl = TestHydro*t_80(i:TestPos+i-1).'; MaxCorrel = max([MaxCorrel correl]); end end MaxCorrel = MaxCorrel / sqrt(numb_attempts); BestMaxCorrel = max([BestMaxCorrel MaxCorrel]); % Find maximum correlation between test sequence and t_70 TemplatePos = length(t_70); MaxCorrel = -99999; if TestPos > TemplatePos numb_attempts = 1+TestPos-TemplatePos; for i = 1:numb_attempts correl = TestHydro(i:TemplatePos+i-1)*t_70.'; MaxCorrel = max([MaxCorrel correl]); end else numb_attempts = 1+TemplatePos-TestPos; for i = 1:numb_attempts correl = TestHydro*t_70(i:TestPos+i-1).'; MaxCorrel = max([MaxCorrel correl]); end end MaxCorrel = MaxCorrel / sqrt(numb_attempts); BestMaxCorrel = max([BestMaxCorrel MaxCorrel]); % Find maximum correlation between test sequence and t_60 TemplatePos = length(t_60); MaxCorrel = -99999; if TestPos > TemplatePos numb_attempts = 1+TestPos-TemplatePos; for i = 1:numb_attempts correl = TestHydro(i:TemplatePos+i-1)*t_60.'; MaxCorrel = max([MaxCorrel correl]); end else numb_attempts = 1+TemplatePos-TestPos; for i = 1:numb_attempts correl = TestHydro*t_60(i:TestPos+i-1).'; MaxCorrel = max([MaxCorrel correl]); end end MaxCorrel = MaxCorrel / sqrt(numb_attempts); BestMaxCorrel = max([BestMaxCorrel MaxCorrel]); % Find maximum correlation between test sequence and t_50 TemplatePos = length(t_50); MaxCorrel = -99999; if TestPos > TemplatePos numb_attempts = 1+TestPos-TemplatePos; for i = 1:numb_attempts correl = TestHydro(i:TemplatePos+i-1)*t_50.'; MaxCorrel = max([MaxCorrel correl]); end else numb_attempts = 1+TemplatePos-TestPos; for i = 1:numb_attempts correl = TestHydro*t_50(i:TestPos+i-1).'; MaxCorrel = max([MaxCorrel correl]); end end MaxCorrel = MaxCorrel / sqrt(numb_attempts); BestMaxCorrel = max([BestMaxCorrel MaxCorrel]); % Find maximum correlation between test sequence and t_40 TemplatePos = length(t_40); MaxCorrel = -99999; if TestPos > TemplatePos numb_attempts = 1+TestPos-TemplatePos; for i = 1:numb_attempts correl = TestHydro(i:TemplatePos+i-1)*t_40.'; MaxCorrel = max([MaxCorrel correl]); end else numb_attempts = 1+TemplatePos-TestPos; for i = 1:numb_attempts correl = TestHydro*t_40(i:TestPos+i-1).'; MaxCorrel = max([MaxCorrel correl]); end end MaxCorrel = MaxCorrel / sqrt(numb_attempts); BestMaxCorrel = max([BestMaxCorrel MaxCorrel]); % Find maximum correlation between test sequence and t_30 TemplatePos = length(t_30); MaxCorrel = -99999; if TestPos > TemplatePos numb_attempts = 1+TestPos-TemplatePos; for i = 1:numb_attempts correl = TestHydro(i:TemplatePos+i-1)*t_30.'; MaxCorrel = max([MaxCorrel correl]); end else numb_attempts = 1+TemplatePos-TestPos; for i = 1:numb_attempts correl = TestHydro*t_30(i:TestPos+i-1).'; MaxCorrel = max([MaxCorrel correl]); end end MaxCorrel = MaxCorrel / sqrt(numb_attempts); BestMaxCorrel = max([BestMaxCorrel MaxCorrel]); % Find maximum correlation between test sequence and t_20 TemplatePos = length(t_20); MaxCorrel = -99999; if TestPos > TemplatePos numb_attempts = 1+TestPos-TemplatePos; for i = 1:numb_attempts correl = TestHydro(i:TemplatePos+i-1)*t_20.'; MaxCorrel = max([MaxCorrel correl]); end else numb_attempts = 1+TemplatePos-TestPos; for i = 1:numb_attempts correl = TestHydro*t_20(i:TestPos+i-1).'; MaxCorrel = max([MaxCorrel correl]); end end MaxCorrel = MaxCorrel / sqrt(numb_attempts); BestMaxCorrel = max([BestMaxCorrel MaxCorrel]); % Find maximum correlation between test sequence and t_10 TemplatePos = length(t_10); MaxCorrel = -99999; if TestPos > TemplatePos numb_attempts = 1+TestPos-TemplatePos; for i = 1:numb_attempts correl = TestHydro(i:TemplatePos+i-1)*t_10.'; MaxCorrel = max([MaxCorrel correl]); end else numb_attempts = 1+TemplatePos-TestPos; for i = 1:numb_attempts correl = TestHydro*t_10(i:TestPos+i-1).'; MaxCorrel = max([MaxCorrel correl]); end end MaxCorrel = MaxCorrel / sqrt(numb_attempts); BestMaxCorrel = max([BestMaxCorrel MaxCorrel]); % Find maximum correlation between test sequence and t_0 TemplatePos = length(t_0); MaxCorrel = -99999; if TestPos > TemplatePos numb_attempts = 1+TestPos-TemplatePos; for i = 1:numb_attempts correl = TestHydro(i:TemplatePos+i-1)*t_0.'; MaxCorrel = max([MaxCorrel correl]); end else numb_attempts = 1+TemplatePos-TestPos; for i = 1:numb_attempts correl = TestHydro*t_0(i:TestPos+i-1).'; MaxCorrel = max([MaxCorrel correl]); end end MaxCorrel = MaxCorrel / sqrt(numb_attempts); BestMaxCorrel = max([BestMaxCorrel MaxCorrel]); Scores = [Scores BestMaxCorrel]; end mean_score = mean(Scores) sd_score = std(Scores)