data<-readLines(textConnection("Aleiodes_bicolor_MRS1008 TATTTTATATTTTTTATTT Aleiodes_praetor_UK_MRS67_ XXXXXXXXXGTTTTATAT Aleiodes_rugulosus_CollHH1599_Norway xxxxxxxxxATTTTGTATTTTTT Aleiodes_seriatus_MRS252_France xxxxxxxxxxxxxxxxxxxxxxxxx Aleiodes_seriatus_MRS254_France xxxxxxxxxxxxxxxxxxxxxxxxxxxxx Aleiodes_seriatus_MRS263_France xxxxxxxxxATTTTATACTTTTTATTTGG Aleiodes_seriatus_MRS264_France xxxxxxxxxATTTTATACTTTTTATTTGG Aleiodes_seriatus_MRS136_France GATATTGGAATTTTATATTT MRS239_Aleiodes_seriatus_Russia xxxxxxxxxGTTTTATACTTCTTATTT Aleiodes_seriatus_MRS222_Germany xxxxxxxxxATTTtaTaCTTTTTATT Aleiodes_sibiricus_MRS313_Sweden xxxxxxxxxxTTTTGTATTTTTTATT Aleiodes_signatus_MRS378_Sweden xxxxxxxxxxxTTTATATTTTTTATT Aleiodes_signatus_MRS712_Sweden GATATTGGTATTTTATATTTTTTA Aleiodes_unipunctator_CollHH1603_Norway xxxxxxxATTTTATATTTTTTATG")) startwithMRS<-function(b){ mrs<-unlist(regmatches(x = b, gregexpr("MRS[0-9]+",text = b))) if(length(mrs)>0) { # the MRS string occurs on the line z<-gsub(mrs,"",b) # remove the original MRS number occurrence z<-paste(mrs,"_",z,sep="") # paste the MRS number at the beginning } else z<-b # the MRS string does not occur on the line z } MRSatstart<-unlist(lapply(data, function(x) startwithMRS(x))) sort(MRSatstart)