MySQL General Log Parser

Developed In: Perl — Contributed by: Gavin Towey

The mysql general log outputs all statements and commands sent to the server in a plain text format. This is useful for many things such as gathering all queries that the server is running, including selects that wouldn't otherwise show up in the binary logs. However this format isn't always easy to parse and extract whole statements from because queries with newline characters mean that you have to read until the beginning of the next entry to find out where your statement actually ends. This perl script will take care of finding these boundaries and printing statements.

The output can then be used more easily to feed into mysqlslap and/or maatkit's mk-query-profiler for instance.


Gavin Towey
Perl
  1. #!/usr/bin/perl
  2. use strict;
  3. use Data::Dumper;
  4. use Getopt::Long;
  5.  
  6. # author: Gavin Towey, 2008 gtowey@gmail.com
  7. # todo, add "follow thread" capability
  8. # so we can give a process name or thread id & see
  9. # all activity in sequence for each thread
  10.  
  11. my %OPTIONS;
  12.  
  13. if (
  14. !GetOptions( \%OPTIONS,
  15. "help",
  16. "type|t=s",
  17. "pattern|p=s",
  18. "preserve-newlines|n",
  19. "separator|s=s" )
  20.  
  21. )
  22. {
  23. $OPTIONS{'help'}++;
  24. }
  25.  
  26. if (!defined($OPTIONS{'type'})) {
  27. $OPTIONS{'type'} = 'query';
  28. } else {
  29. $OPTIONS{'type'} = lc ($OPTIONS{'type'});
  30. }
  31.  
  32.  
  33. my $file = $ARGV[0];
  34.  
  35. if ( !$file ) {
  36. print "missing log file name\n";
  37. $OPTIONS{'help'}++;
  38. }
  39.  
  40. if ( $OPTIONS{'help'} ) {
  41. usage();
  42. }
  43.  
  44. main();
  45.  
  46. my @LINEBUFFER;
  47.  
  48. sub get_next_query {
  49. my ($FH) = shift;
  50.  
  51. my ( $query_found, $error, $in_block ) = ( 0, 0, 0 );
  52. if ($#LINEBUFFER ==0 ) { $in_block = 1; }
  53.  
  54. while ( !$query_found && !$error ) {
  55.  
  56. $LINEBUFFER[ $#LINEBUFFER + 1 ] = <$FH>;
  57. if ( !$LINEBUFFER[$#LINEBUFFER] ) {
  58. return -1;
  59. }
  60.  
  61. if ( !$in_block
  62. && $LINEBUFFER[$#LINEBUFFER] =~ /^(\d{6} [\d:]{8})?\s+(\d+)\s(\w+)(\s(.*))?/ )
  63. { # we have the beginning of a line
  64.  
  65. if ( $#LINEBUFFER == 0 ) { # begin block capture
  66. $in_block = 1;
  67. }
  68.  
  69. }
  70. elsif ($in_block) {
  71.  
  72.  
  73. if ( $LINEBUFFER[$#LINEBUFFER] =~ /^(\d{6} [\d:]{8})?\s+(\d+)\s(\w+)(\s(.*))?/ ) {
  74. if ( $#LINEBUFFER > 0 ) { #end block
  75. # return everything up to this statement
  76. $query_found = '';
  77. for ( my $i = 0 ; $i < $#LINEBUFFER ; $i++ ) {
  78. $query_found .= $LINEBUFFER[$i];
  79. }
  80. $LINEBUFFER[0] = $LINEBUFFER[$#LINEBUFFER];
  81. $#LINEBUFFER = 0;
  82.  
  83. }
  84. } else {
  85. }
  86. }
  87. else {
  88. shift @LINEBUFFER;
  89. }
  90.  
  91. }
  92. return $query_found;
  93. }
  94.  
  95. sub main {
  96. open( FILE, $file );
  97. my $done = 0;
  98. while ( !$done ) {
  99. my $query = get_next_query( \*FILE );
  100. if ( $query eq -1 ) {
  101. $done = 1;
  102. }
  103. else {
  104. chomp($query);
  105. $query =~ /^(\d{6} [\d:]{8})?\s+(\d+)\s(\w+)(\s+(.*))?/s;
  106. my ($type, $query ) = (lc($3), $5);
  107. if (!$OPTIONS{'preserve-newlines'}) { $query =~ s/[\r\n]/ /g; }
  108. if ( $type eq $OPTIONS{'type'}) {
  109. if (defined($OPTIONS{'pattern'})) {
  110. if ( $query =~ /$OPTIONS{'pattern'}/ ) {
  111. print $query . $OPTIONS{'separator'} . "\n";
  112.  
  113. }
  114. } else {
  115. print $query . $OPTIONS{'separator'}. "\n";
  116. }
  117. }
  118. }
  119. }
  120. close FILE;
  121. }
  122.  
  123. sub usage {
  124. print <<EOF;
  125. NAME
  126. $0 - dump statement from mysql general log format
  127.  
  128. USAGE
  129. $0 <options> [log file]
  130.  
  131. SYNOPISIS
  132. For the most part, the general log is pretty straighforward,
  133. except when SQL statements contain newline characters.
  134. This script takes care of finding those boundaries and
  135. extracting whole statements.
  136.  
  137. Most often some filter is passed to the program in order
  138. to return only certain types of statements.
  139.  
  140.  
  141. OPTIONS
  142.  
  143. --help
  144. Display this screen
  145.  
  146. --type=s
  147. -t
  148. One of Query or Connect, default is Query
  149.  
  150. --pattern=s
  151. -p
  152. Regular expression to match statements against.
  153. Usually something like ^SELECT
  154.  
  155. --preserve-newlines
  156. -n
  157. Keep original newlines in multiline queries default
  158. is to make all queries single line.
  159.  
  160. -separator=s
  161. -s
  162. Add the separator after every query
  163.  
  164. EOF
  165. }

Current Tags

You must be logged in to tag this tool

Let's see if a <pre> block helps formatting any

--- mysql_log_parser.pl.orig   2008-09-11 11:19:02.000000000 -0400
+++ mysql_log_parser.pl   2008-09-11 11:21:00.000000000 -0400
@@ -4,6 +4,7 @@
 use Getopt::Long;
 
 # author: Gavin Towey, 2008 gtowey@gmail.com
+# contributions from: Moshe Hyzon, 2008 moshe.hyzon@grantstreet.com
 # todo, add "follow thread" capability
 # so we can give a process name or thread id & see
 # all activity in sequence for each thread
@@ -16,7 +17,9 @@
             "type|t=s",
             "pattern|p=s",
             "preserve-newlines|n",
-            "separator|s=s" )
+            "separator|s=s",
+            "time|T",
+            "connection-id|c" )
 
   )
 {
@@ -61,7 +64,7 @@
       }
 
       if ( !$in_block
-         && $LINEBUFFER[$#LINEBUFFER] =~ /^(\d{6} [\d:]{8})?\s+(\d+)\s(\w+)(\s(.*))?/ )
+         && $LINEBUFFER[$#LINEBUFFER] =~ /^(\d{6}\s+[\d:]{7,8})?\s+(\d+)\s(\w+)(\s(.*))?/ )
       {    # we have the beginning of a line
 
          if ( $#LINEBUFFER == 0  ) {    # begin block capture
@@ -72,7 +75,7 @@
       elsif ($in_block) {
 
          
-         if ( $LINEBUFFER[$#LINEBUFFER] =~ /^(\d{6} [\d:]{8})?\s+(\d+)\s(\w+)(\s(.*))?/ ) {
+         if ( $LINEBUFFER[$#LINEBUFFER] =~ /^(\d{6}\s+[\d:]{7,8})?\s+(\d+)\s(\w+)(\s(.*))?/ ) {
             if ( $#LINEBUFFER > 0 ) {    #end block
                    # return everything up to this statement
                $query_found = '';
@@ -97,6 +100,7 @@
 sub main {
    open( FILE, $file );
    my $done = 0;
+   my $datetime = '';
    while ( !$done ) {
       my $query = get_next_query( \*FILE );    
       if ( $query eq -1 ) {
@@ -104,9 +108,12 @@
       }
       else {
          chomp($query);
-         $query =~ /^(\d{6} [\d:]{8})?\s+(\d+)\s(\w+)(\s+(.*))?/s;
-         my ($type, $query ) = (lc($3), $5);
+         $query =~ /^(\d{6}\s+[\d:]{7,8})?\s+(\d+)\s(\w+)(\s+(.*))?/s;
+         my ($type, $id, $query ) = (lc($3), $2, $5);
+         if ($1 != '') { $datetime = $1 ; }
          if (!$OPTIONS{'preserve-newlines'}) { $query =~ s/[\r\n]/ /g; }
+         if ($OPTIONS{'connection-id'}) { $query = "$id\t$query"; }
+         if ($OPTIONS{'time'}) { $query = "$datetime\t$query"; }
          if ( $type eq $OPTIONS{'type'}) {
             if (defined($OPTIONS{'pattern'})) {
                if ( $query =~ /$OPTIONS{'pattern'}/ ) {
@@ -159,10 +166,18 @@
       Keep original newlines in multiline queries default
       is to make all queries single line.
       
-   -separator=s
+   --separator=s
     -s
        Add the separator after every query
 
+   --time
+    -T
+      Print the timestamp of each query
+
+   --connection-id
+    -c
+      Print the connection ID of the query
+
 EOF
 exit;
 }

Actually my problem is, to filter all threads from ONE db-user@host out of the generel log (a big-big generel log !! :-)

"Connect xxx@host on" is helpfull, but then i am running in the problem with the query newlines an the "date time" fields.

The following patch contains a bug fix to the regexes, which did not successfully match lines from the general log that begin with the date/time fields. Also added is the ability to output both date/time of query and connection ID of query.

[code] --- mysql_log_parser.pl.orig 2008-09-11 11:19:02.000000000 -0400 +++ mysql_log_parser.pl 2008-09-11 11:21:00.000000000 -0400 @@ -4,6 +4,7 @@ use Getopt::Long; # author: Gavin Towey, 2008 gtowey@gmail.com +# contributions from: Moshe Hyzon, 2008 moshe.hyzon@grantstreet.com # todo, add "follow thread" capability # so we can give a process name or thread id & see # all activity in sequence for each thread @@ -16,7 +17,9 @@ "type|t=s", "pattern|p=s", "preserve-newlines|n", - "separator|s=s" ) + "separator|s=s", + "time|T", + "connection-id|c" ) ) { @@ -61,7 +64,7 @@ } if ( !$in_block - && $LINEBUFFER[$#LINEBUFFER] =~ /^(\d{6} [\d:]{8})?\s+(\d+)\s(\w+)(\s(.*))?/ ) + && $LINEBUFFER[$#LINEBUFFER] =~ /^(\d{6}\s+[\d:]{7,8})?\s+(\d+)\s(\w+)(\s(.*))?/ ) { # we have the beginning of a line if ( $#LINEBUFFER == 0 ) { # begin block capture @@ -72,7 +75,7 @@ elsif ($in_block) { - if ( $LINEBUFFER[$#LINEBUFFER] =~ /^(\d{6} [\d:]{8})?\s+(\d+)\s(\w+)(\s(.*))?/ ) { + if ( $LINEBUFFER[$#LINEBUFFER] =~ /^(\d{6}\s+[\d:]{7,8})?\s+(\d+)\s(\w+)(\s(.*))?/ ) { if ( $#LINEBUFFER > 0 ) { #end block # return everything up to this statement $query_found = ''; @@ -97,6 +100,7 @@ sub main { open( FILE, $file ); my $done = 0; + my $datetime = ''; while ( !$done ) { my $query = get_next_query( \*FILE ); if ( $query eq -1 ) { @@ -104,9 +108,12 @@ } else { chomp($query); - $query =~ /^(\d{6} [\d:]{8})?\s+(\d+)\s(\w+)(\s+(.*))?/s; - my ($type, $query ) = (lc($3), $5); + $query =~ /^(\d{6}\s+[\d:]{7,8})?\s+(\d+)\s(\w+)(\s+(.*))?/s; + my ($type, $id, $query ) = (lc($3), $2, $5); + if ($1 != '') { $datetime = $1 ; } if (!$OPTIONS{'preserve-newlines'}) { $query =~ s/[\r\n]/ /g; } + if ($OPTIONS{'connection-id'}) { $query = "$id\t$query"; } + if ($OPTIONS{'time'}) { $query = "$datetime\t$query"; } if ( $type eq $OPTIONS{'type'}) { if (defined($OPTIONS{'pattern'})) { if ( $query =~ /$OPTIONS{'pattern'}/ ) { @@ -159,10 +166,18 @@ Keep original newlines in multiline queries default is to make all queries single line. - -separator=s + --separator=s -s Add the separator after every query + --time + -T + Print the timestamp of each query + + --connection-id + -c + Print the connection ID of the query + EOF exit; } [/code]

Moshe Hyzon

Votes

  • Rated 5.00 out of 5
Rated 5.00 out of 5 with 2 votes cast.
You must be logged in to vote.

Watches

4 members are watching this tool
You must be logged in to track this tool.

Provide Feedback

Please note:
HTML will be purified, but we allow for a number of HTML tags so that you have the flexibility to decorate your comment text to some extent. The comments allow the following HTML tags:

strong, b, em, blockquote, a, code, pre

To put code into your comment, simply encapsulate your code with
[code language="XXX"][/code], where XXX is any common language, for instance "PHP", "SQL", "C", etc.



You must be logged in to comment