We use an in-house bidirectional kmer-to-integer mapping algorithm to retrieve and store k-mer enrichment scores for sepcific proteins in our contig_8mers tables. Here are two separate implementations in Perl and PHP, going both ways. They do exactly the same thing, and can easily be translated into any language you choose. Perl: sub intToKmer { my ($int, $kmerLength) = @_; unless(defined($kmerLength)) { die "kmer length needs to be supplied to intToKmer function"; } my $kmer = ''; my $nt = ''; my $ntVal = 0; while ($int > 0) { $ntVal = $int % 4; if ($ntVal == 0) { $nt = 'A'; } elsif ($ntVal == 1) { $nt = 'C'; } elsif ($ntVal == 2) { $nt = 'G'; } elsif ($ntVal == 3) { $nt = 'T'; } else { die "Invalid input to intToKmer function: $ntVal\n"; } $kmer = $nt . $kmer; $int = ($int - $ntVal) / 4; } # To correct for cases in which A's are at the front of the string (which equal 0, so are left out by loop) while(length($kmer) < $kmerLength) { $kmer = 'A' . $kmer; } return $kmer; } #Converts a k-mer to a uniquely corresponding integer. #The integer = 4 * k-mer length plus the cumulative sum of #all base values where A,C,G,T = 0,1,2,3. */ sub kmerToInt { my ($kmer) = @_; my $int = 0; my $ntVal = 0; my $base; for(my $i = 0 ; $i < length($kmer) ; $i++) { $base = substr($kmer, $i, 1); if (uc($base) eq 'A') { $ntVal = 0; } elsif (uc($base) eq 'C') { $ntVal = 1; } elsif (uc($base) eq 'G') { $ntVal = 2; } elsif (uc($base) eq 'T') { $ntVal = 3; } elsif (uc($base) eq 'N' || uc($base) eq 'X') { die("Please do not use 'N' or 'X' in the kmer string."); } else { die("Invalid nucleotide character $base entered in kmerToInt"); } $int = $ntVal + 4*$int; } return $int; } PHP: 0) { $ntVal = $int % 4; switch($ntVal) { case 0: $nt = 'A'; break; case 1: $nt = 'C'; break; case 2: $nt = 'G'; break; case 3: $nt = 'T'; break; default: die("Invalid nucleotide number ($ntVal) in intToKmer"); } $kmer = $nt . $kmer; $int = ($int - $ntVal) / 4; } # To correct for cases in which A's are at the front of the string (which equal 0, so are left out by loop) while(strlen($kmer) < $kmerLength) { $kmer = 'A' . $kmer; } return $kmer; } /* Converts a k-mer to a uniquely corresponding integer. The integer = 4 * k-mer length plus the cumulative sum of all base values where A,C,G,T = 0,1,2,3. */ function kmerToInt($kmer) { $int = $ntVal = 0; for($i = 0 ; $i < strlen($kmer) ; $i++) { switch($kmer{$i}) { case 'A': case 'a': $ntVal = 0; break; case 'C': case 'c': $ntVal = 1; break; case 'G': case 'g': $ntVal = 2; break; case 'T': case 't': $ntVal = 3; break; case 'N': case 'n': case 'X': case 'x': die("Please do not use 'N' or 'X' in the kmer string."); break; default: die("Invalid nucleotide character ($kmer{$i}) entered in kmerToInt"); } $int = $ntVal + 4*$int; } return $int; } ?>