# R code Chapter 21 text manipilation part 1 tr1<-"(((((BBTH004_16_CCDB_24026_A4_91Tachinidae[90Drosophila]:1.14675336480776657311,(BBTH006_16_CCDB_24026_A6_91Belvosia:0.59297714057027572920,BBTH055_16_CCDB_24026_E7_92Tachinidae[91Belvosia]:0.52736765819178677006):0.32998852276617751667):0.31714205003432383023,((BBTH084_16_CCDB_24026_G12_97Tachinidae:0.00006954085997979414,BBTH085_16_CCDB_24026_H1_97Tachinidae:0.00006954085997979414):0.00006954085997979414,(BBTH080_16_CCDB_24026_G8_97Tachinidae:0.00006954085997979414,BBTH091_16_CCDB_24026_H7_97Tachinidae:0.00006954085997979414)" tr2<-gsub("16_CCDB_24026_","",tr1) # tr2 tr3<- gsub("BBTH.+?24026", "BBTH24026", tr1) # tr3 tr4<- gsub("\\[.+?\\]", "", tr3) #this means remove everything between [ and ] # tr4 tr5<- strsplit(tr4,"") tr5<-unlist(tr5) # head(tr5,40) colon<-which(tr5==":") colon close<-which(tr5==")") comma<-which(tr5==",") # close # comma tr5[colon[1]:(comma[1]-1)] # Method 1 tr4<- gsub("\\[.+?\\]", "", tr3) repeat{ ss<-unlist(strsplit(tr4, "")) colon<-which(ss==":") L<-length(unlist(colon)) if(L==0)break close<-which(ss==")") comma<-which(ss==",") a<-which(comma>colon[1]) b<-which(close>colon[1]) c<-min(comma[a[1]],close[b[1]]) n<-substr(tr4,colon[1],c-1) tr4<-gsub(n,"",tr4) } tr4 # Method 2 tr4<- gsub("\\[.+?\\]","",tr3) tr5<- unlist(strsplit(tr4,"")) colons<-grep(":",tr5) ends<-colons+22 for(i in length(colons):1) tr4<-gsub(substr(tr4,colons[10],ends[10]),"",tr4) tr4 # Method 3 tr4<-gsub("\\[.+?\\]","",tr3) tr5<-unlist(strsplit(tr4,"")) colons<-grep(":",tr5) ends<-colons+22 # 22 including the colon positions<-NULL for(i in 1:length(colons)){ temp<-colons[i]:ends[i] positions<-c(positions,temp)} tr5<-tr5[-positions] tr4<-paste(tr5,collapse="") tr4 # Extracting names and well number names<-gsub("\\(","",tr4) names<-unlist(strsplit(names,")")) names<-unlist(strsplit(names,",")) # names names<-names[which(names!= "")] names # rearranging names rearrange<-NULL for(i in 1:length(names)){ temp<-unlist(strsplit(names[i],"_")) # temp = "BBTH24026" "A4" "91Tachinidae" rearrange<-c(rearrange,paste(temp[2],temp[1],temp[3],sep="_")) } rearrange sort(rearrange)