#! /usr/bin/perl -w use Bio::SeqIO; #================================= #Script to rename sequences based on user-defined pattern matching #briandotoakleyatarsdotusdadotgov #k.purdy at warwick.ac dot uk #================================= #---define input file---- my $usage = "\nUsage: $0 filename\n\n"; $input_file = $ARGV[0] or die $usage; chomp $input_file; #---define output stream---- $out = Bio::SeqIO->new(-fh => \*STDOUT); my $seq_in = Bio::SeqIO->new( -format => 'fasta',-file => $input_file); while ($seq_obj = $seq_in->next_seq()) { $id=$seq_obj->id(); &pattern_match; #Do something with sequence ids; $out->write_seq($seq_obj); } sub pattern_match { #EXAMPLES ARE LEFT UNCOMMENTED BELOW. EDIT TO SUIT. if ($id =~ m/^HJSJARD01/) {$group=Diseased} #define some group based on regex if ($id =~ m/^HJJBOJ401/) {$group=Healthy} $seqid = substr $id, -5; # takes last 5 characters from each id $new_id = $group."_".$seqid; #makes new id based on groups and seqids $seq_obj->id($new_id); #resets id for seq object }