-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmake_st.cpp
More file actions
89 lines (72 loc) · 2.51 KB
/
Copy pathmake_st.cpp
File metadata and controls
89 lines (72 loc) · 2.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#include <iostream>
#include <vector>
#include <algorithm>
#include <fstream>
#include <string>
#include <sdsl/cst_sct3.hpp>
#include <sdsl/csa_wt.hpp>
const char termCharacter = '$';
char getRCBase(char base) {
switch(base) {
case 'A': return 'T';
case 'T': return 'A';
case 'C': return 'G';
case 'G': return 'C';
default : return 'N';
}
}
std::string getRCRead(const std::string& read) {
int readlen = read.size();
std::string result;
for(int i = 0; i < readlen; i++) {
result += getRCBase(read[i]);
}
reverse(result.begin(), result.end());
return result;
}
bool isValidRead(const std::string &read) {
const std::string alphabet = "ACGT";
int readlen = read.size();
for(int i = 0; i < readlen; i++){
if(std::find(alphabet.begin(), alphabet.end(), read[i]) == alphabet.end()) return false;
}
return true;
}
int main(int argc, char **argv) {
// Usage: ./make_st.out <read.fastq> <index-file-name> <thread-num>"
std::string readFilePath = std::string(argv[1]);
std::string indexFilePath = std::string(argv[2]);
std::string tmpFilePath = indexFilePath + "_read_text.tmp";
std::ifstream ifsRead(readFilePath);
std::ofstream ofsTmp(tmpFilePath);
ofsTmp << termCharacter;
std::string name, seq, blankline, qual;
while(getline(ifsRead, name)) {
getline(ifsRead, seq);
getline(ifsRead, blankline);
getline(ifsRead, qual);
if(!isValidRead(seq)) continue;
ofsTmp << seq << termCharacter;
std::string rcseq = getRCRead(seq);
ofsTmp << rcseq << termCharacter;
}
ifsRead.close();
ofsTmp.close();
std::string CSTFilePath = indexFilePath + ".cst";
// std::string CSAFilePath = indexFilePath + ".csa";
sdsl::cst_sct3<> cst;
// sdsl::csa_wt<> csa;
sdsl::memory_monitor::start();
auto start = std::chrono::high_resolution_clock::now();
sdsl::cache_config config;
construct(cst, tmpFilePath, config, 1);
store_to_file(cst, CSTFilePath);
auto stop = std::chrono::high_resolution_clock::now();
std::cout << "Finish CST. " << std::chrono::duration_cast<std::chrono::seconds>(stop-start).count() << " seconds." << std::endl;
std::cout << "Peak memory by SDSL: " << sdsl::memory_monitor::peak() / 1024 << " KB" << std::endl;
stop = std::chrono::high_resolution_clock::now();
std::cout << "Finish. " << std::chrono::duration_cast<std::chrono::seconds>(stop-start).count() << " seconds." << std::endl;
sdsl::memory_monitor::stop();
std::cout << "Peak memory by SDSL: " << sdsl::memory_monitor::peak() / 1024 << " KB" << std::endl;
return 0;
}