Repeated DNA Sequences Total Accepted: 8816 Total Submissions: 47451 My Submissions Question Solution
All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: “ACGAATTCCG”. When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.
Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.
For example,
Given s = “AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT”,
Return:
[“AAAAACCCCC”, “CCCCCAAAAA”].
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | import java.util.*; public class RepeatedDNASequences { public static List<String> findRepeatedDnaSequences(String s) { ArrayList<String> result = new ArrayList<String>(); if(s== null || s.length() == 0) return result; HashMap<Character,Integer> map = new HashMap<Character,Integer>(); map.put('A',0); map.put('C',1); map.put('G',2); map.put('T',3); HashSet<Integer> hashSet = new HashSet<Integer>(); Set<Integer> unique = new HashSet<Integer>(); int temp = 0; for(int i=0;i<s.length();i++){ /*temp = 0; for(int j=0;j<10;j++){ temp = (temp<<2) + map.get(s.charAt(i+j)); }*/ if(i<9){ temp = (temp<<2) + map.get(s.charAt(i)); // System.out.println(temp); } else{ temp = (temp<<2) + map.get(s.charAt(i)); temp &= (1<<20) - 1; System.out.println(temp); if(hashSet.contains(temp) && !unique.contains(temp)){ result.add(s.substring(i-9,i+1)); unique.add(temp); } else { hashSet.add(temp); } } } return result; } public static void main(String[] args) { String s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT"; System.out.println(findRepeatedDnaSequences(s)); } } |