%%% Specify CM parameter file and database file fileIn = fopen('U12_CM.txt'); fileDataIn = fopen('Database.txt'); %%% U12 structure % Node numbers associated with consensus sequence ordered left to right (5' to 3') EmitNodes = 1 + [1:2 10:14 15:24 14:-1:10 27:34 37:44 45:50 ... 44:-1:37 53:57 58:64 57:-1:53 67:77 80:86 87:89 91:97 ... 98:102 97:-1:91 90 86:-1:80 105:106 107:115 116:124 115:-1:107 5:-1:3]; % Node type for each emit node: 0=single, 1=left-pair, 2=right-pair EmitTypes = [zeros(size(1:2)) ones(size(10:14)) zeros(size(15:24)) ... 2*ones(size(14:-1:10)) zeros(size(27:34)) ones(size(37:44)) zeros(size(45:50)) ... 2*ones(size(44:-1:37)) ones(size(53:57)) zeros(size(58:64)) 2*ones(size(57:-1:53)) ... zeros(size(67:77)) ones(size(80:86)) zeros(size(87:89)) ones(size(91:97)) ... zeros(size(98:102)) 2*ones(size(97:-1:91)) zeros(size(90)) 2*ones(size(86:-1:80)) ... zeros(size(105:106)) ones(size(107:115)) zeros(size(116:124)) ... 2*ones(size(115:-1:107)) zeros(size(5:-1:3))]; % Location in consensus sequence of right member of pair given left member location RightPairLoc = [zeros(size(1:2)) 22:-1:18 zeros(size(15:24)) ... zeros(size(14:-1:10)) zeros(size(27:34)) 52:-1:45 zeros(size(45:50)) ... zeros(size(44:-1:37)) 69:-1:65 zeros(size(58:64)) zeros(size(57:-1:53)) ... zeros(size(67:77)) 109:-1:103 zeros(size(87:89)) 117:-1:111 ... zeros(size(98:102)) zeros(size(97:-1:91)) zeros(size(90)) zeros(size(86:-1:80)) ... zeros(size(105:106)) 146:-1:138 zeros(size(116:124)) ... zeros(size(115:-1:107)) zeros(size(5:-1:3))]; %%% Read CM parameter file NodeType = zeros(1, 126); NodeIndex = NodeType; NodeCount = 0; MatchPairScores = zeros(16, 126); MatchSingleScores = zeros(4, 126); while 1 FileLine = fgetl(fileIn); if ~ischar(FileLine) break end if length(find(FileLine == '[')) > 0 [RemoveToken, RemainToken] = strtok(FileLine); [RemoveToken, RemainToken] = strtok(RemainToken); [StrNodeNum, RemainToken] = strtok(RemainToken); NodeNum = str2num(StrNodeNum) + 1; NodeCount = NodeCount + 1; NodeIndex(NodeCount) = NodeNum; if length(findstr(FileLine, 'MATL')) > 0 NodeType(NodeNum) = 1; end if length(findstr(FileLine, 'MATR')) > 0 NodeType(NodeNum) = 2; end if length(findstr(FileLine, 'MATP')) > 0 NodeType(NodeNum) = 3; end if length(findstr(FileLine, 'BIF')) > 0 NodeType(NodeNum) = 4; end if length(findstr(FileLine, 'BEGL')) > 0 NodeType(NodeNum) = 5; end if length(findstr(FileLine, 'BEGR')) > 0 NodeType(NodeNum) = 6; end if length(findstr(FileLine, 'ROOT')) > 0 NodeType(NodeNum) = 7; end if length(findstr(FileLine, 'END')) > 0 NodeType(NodeNum) = 8; end else [RemoveToken, RemainToken] = strtok(FileLine); NumericValues = str2num(RemainToken); if length(findstr(FileLine, 'MP')) > 0 ScoresStart = NumericValues(5) + 6; MatchPairScores(:, NodeNum) = NumericValues(ScoresStart:ScoresStart+15); end if length(findstr(FileLine, 'ML')) > 0 ScoresStart = NumericValues(5) + 6; MatchSingleScores(:, NodeNum) = NumericValues(ScoresStart:ScoresStart+3); end if length(findstr(FileLine, 'MR')) > 0 ScoresStart = NumericValues(5) + 6; MatchSingleScores(:, NodeNum) = NumericValues(ScoresStart:ScoresStart+3); end end end fclose(fileIn); %%% Read database file CharDatabase = []; TrueLocs = []; while 1 FileLine = fgetl(fileDataIn); if ~ischar(FileLine) break end if FileLine(1) ~= '>' CharDatabase = [CharDatabase FileLine]; else if FileLine(2) == '*' TrueLocs = [TrueLocs length(CharDatabase)+1]; end end end fclose(fileDataIn); CharDatabase = upper(CharDatabase); Database = ones(size(CharDatabase)); for i = 1:length(CharDatabase) if CharDatabase(i) == 'C' Database(i) = 2; end if CharDatabase(i) == 'G' Database(i) = 3; end if CharDatabase(i) == 'U' Database(i) = 4; end if CharDatabase(i) == 'T' Database(i) = 4; end end %%% Find ungapped scores UngapScores = zeros(1, length(Database)+1-length(EmitNodes)); for i = 1:length(Database)+1-length(EmitNodes) Score = 0; for j = 1:length(EmitNodes) if EmitTypes(j) == 0 Score = Score + MatchSingleScores(Database(i+j-1), EmitNodes(j)); end if EmitTypes(j) == 1 PairIndex = Database(i+RightPairLoc(j)-1) + 4*(Database(i+j-1)-1); Score = Score + MatchPairScores(PairIndex, EmitNodes(j)); end end UngapScores(i) = Score; end %%% Select 100 best ungapped score locations as initial population [PopScores, PopLocs] = sort(UngapScores, 'descend'); PopScores = PopScores(1:100); PopLocs = PopLocs(1:100); NewScores = zeros(1,70); %%% Initialize all representations to ungapped PopReps = ones(100, length(EmitNodes)+1); PopReps(:,length(EmitNodes)+1) = PopLocs; NewPreps = ones(70, length(EmitNodes)+1); %%% Run GA for gen = 1:1000 % Find elite EliteScores = PopScores(1); EliteReps = PopReps(1,:); for i = 2:100 differs = 1; for j = 1:length(EliteScores) if abs(PopReps(i,length(EmitNodes)+1)-EliteReps(j,length(EmitNodes)+1)) < 3 differs = 0; end end if differs == 1 EliteScores = [EliteScores PopScores(i)]; EliteReps = [EliteReps; PopReps(i,:)]; end end EliteScores = [EliteScores PopScores]; EliteReps = [EliteReps; PopReps]; EliteScores = EliteScores(1:30); EliteReps = EliteReps(1:30,:); % Crossover for i = 1:35 CrossPoint = 1 + floor((length(EmitNodes)-2)*rand); CrossFirst = 1 + floor(100*rand); CrossPair = 1 + floor(100*rand); NewReps(i,:) = [PopReps(CrossFirst,1:CrossPoint) PopReps(CrossPair,CrossPoint+1:length(EmitNodes)+1)]; end % Mutate for i = 1:35 MutateInd = 1 + floor(100*rand); MutPoint = 1 + floor((length(EmitNodes)-1)*rand); MutDelta = 2*round(rand) - 1; NewReps(i+35,:) = PopReps(MutateInd,:); if NewReps(i+35,MutPoint)+MutDelta > -1 NewReps(i+35,MutPoint) = NewReps(i+35,MutPoint) + MutDelta; if rand > 0.5 NewReps(i+35,length(EmitNodes)+1) = NewReps(i+35,length(EmitNodes)+1) - MutDelta; end end end % Score new representations for i = 1:70 Score = 0; for j = 1:length(EmitNodes) NetPos = sum(NewReps(i,1:j)); Loc = NewReps(i,length(EmitNodes)+1); if EmitTypes(j) == 0 if Loc+NetPos-1 < length(Database)+1 Score = Score + MatchSingleScores(Database(Loc+NetPos-1), EmitNodes(j)); end end if EmitTypes(j) == 1 RightNetPos = sum(NewReps(i,1:RightPairLoc(j))); if Loc+RightNetPos-1 < length(Database)+1 PairIndex = Database(Loc+RightNetPos-1) + ... 4*(Database(Loc+NetPos-1)-1); end Score = Score + MatchPairScores(PairIndex, EmitNodes(j)); end end NewScores(i) = Score; end PopScores = [EliteScores NewScores]; PopReps = [EliteReps; NewReps]; % Sort scores and representations [PopScores, PopLocs] = sort(PopScores, 'descend'); PopReps = PopReps(PopLocs,:); end