Repeated DNA Sequences Total Accepted: 8816 Total Submissions: 47451 My Submissions Question Solution
All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: “ACGAATTCCG”. When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.
Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.
For example,
Given s = “AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT”,
Return:
[“AAAAACCCCC”, “CCCCCAAAAA”].
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
|
import java.util.*;
public class RepeatedDNASequences {
public static List<String> findRepeatedDnaSequences(String s) {
ArrayList<String> result = new ArrayList<String>();
if(s== null || s.length() == 0) return result;
HashMap<Character,Integer> map = new HashMap<Character,Integer>();
map.put('A',0);
map.put('C',1);
map.put('G',2);
map.put('T',3);
HashSet<Integer> hashSet = new HashSet<Integer>();
Set<Integer> unique = new HashSet<Integer>();
int temp = 0;
for(int i=0;i<s.length();i++){
/*temp = 0;
for(int j=0;j<10;j++){
temp = (temp<<2) + map.get(s.charAt(i+j));
}*/
if(i<9){
temp = (temp<<2) + map.get(s.charAt(i));
// System.out.println(temp);
}
else{
temp = (temp<<2) + map.get(s.charAt(i));
temp &= (1<<20) - 1;
System.out.println(temp);
if(hashSet.contains(temp) && !unique.contains(temp)){
result.add(s.substring(i-9,i+1));
unique.add(temp);
}
else {
hashSet.add(temp);
}
}
}
return result;
}
public static void main(String[] args) {
String s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT";
System.out.println(findRepeatedDnaSequences(s));
}
} |
import java.util.*;
public class RepeatedDNASequences {
public static List<String> findRepeatedDnaSequences(String s) {
ArrayList<String> result = new ArrayList<String>();
if(s== null || s.length() == 0) return result;
HashMap<Character,Integer> map = new HashMap<Character,Integer>();
map.put('A',0);
map.put('C',1);
map.put('G',2);
map.put('T',3);
HashSet<Integer> hashSet = new HashSet<Integer>();
Set<Integer> unique = new HashSet<Integer>();
int temp = 0;
for(int i=0;i<s.length();i++){
/*temp = 0;
for(int j=0;j<10;j++){
temp = (temp<<2) + map.get(s.charAt(i+j));
}*/
if(i<9){
temp = (temp<<2) + map.get(s.charAt(i));
// System.out.println(temp);
}
else{
temp = (temp<<2) + map.get(s.charAt(i));
temp &= (1<<20) - 1;
System.out.println(temp);
if(hashSet.contains(temp) && !unique.contains(temp)){
result.add(s.substring(i-9,i+1));
unique.add(temp);
}
else {
hashSet.add(temp);
}
}
}
return result;
}
public static void main(String[] args) {
String s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT";
System.out.println(findRepeatedDnaSequences(s));
}
}