text1<- "We want to write some R code recognises animal or plant family, subfamily, tribe or subtribe names in a piece of text. For animals these always end in -idea, -inae, -ini and -ina respectively. The Noctuidae, Erebidae, Sphingidae and Saturniidae are families of moths, the Lymantriinae is a subfamily of Erebidae, the Lymantriini, Leucomini, Orgyiini and Nygmiini are tribes of the Lymantriinae, etc."
text2<-unlist(strsplit(text1," "))
lymantrids<-grep("^Lyman",text2)
#lymantrids
families<-text2[grep("idae$",text2)]
#families
temp<-gsub("[[:punct:]]$","",as.character(text2))
#temp
families<-temp[grep("idae$",temp)]
#families
punctuation<-grep("[[:punct:]]$",as.character(text2))
for(i in 1:length(punctuation)){
word<-text2[punctuation [i]]
word<-paste(substr(word,1,nchar(word)-1),substr(word,nchar(word), nchar(word)),sep=" ")
text2[punctuation [i]]<-word} # end i loop
text2<-unlist(strsplit(text2," "))
#text2
newpunct<-grep("^[[:punct:]]$",as.character(text2))
newtext<-text2[1]
for (i in 2: length(text2)){
ifelse(i %in% newpunct,newtext<-paste(newtext,text2[i],sep=""), newtext<-paste(newtext," ",text2[i],sep=""))}
newtext