Skip to content

motif search

1 message · Jean lobry

#
Dear Alessia,
You may try this:

#
# Load the seqinr package:
#
   library(seqinr)
#
# A FASTA file example - that ships with seqinr - which contains
# the complete genome sequence of Chlamydia trachomatis :
#
   fastafile <- system.file("sequences/ct.fasta", package = "seqinr")
#
# Import the sequence as a string of characters:
#
   myseq <- read.fasta(fastafile, as.string = TRUE)
   nchar(myseq) # 1042519, that is a Mb sequence
#
# Look for motif "atatatat", with possible overlap:
#
   words.pos("atatatat", myseq, extended = TRUE)
#
# This returns the posistions where the motif is found, that
# is : 236501 236503 283987 687083 792792 792794
#
   substr(myseq, 236501, 236501 + 8)
#
# Should be
# [1] "atatatata"
#

HTH,

Jean