SweepMerge

複数のファイルから気になる部分を抜き出し複数列の散布図にプロットできる CSV のような形式に変換して表示するプログラム。perl でとも思ったけど、調べることが多そうだったので scala にしといた。

/**
 * From multipe verbose gc log files with +UseConcMarkSweepGC and +PrintGCDetails,
 * pick up sweep time entries and converts to csv which can be plotted as
 * scattered graph.  The output contains timestamp and sweep time column(s).
 * The timestamp column is named 'time'.  Sweep time columns are named with
 * corresponding filename.
 */
import scala.io._
import java.io.File

/** Sweep time entry which contans timestamp (in second) and duratin.
    As it does not perform any arythmetic operation, fields are String
    to avoid conversion overhead. */
case class STEnt(ts: String, dur: String)

/** Parsed result for a file.  It contains filename, list of STEnt and slot
    number which determines the column# in output. */
case class SweepSum(path: String, ents: List[STEnt], slot: Int)

/*+ Main class which takes file name(s) as arguments and converts them
    to CSV like format. */
object SweepMerge {
  def main(args: Array[String]) {
    // filter out non-existing files.
    val validPaths = args.toList.collect{ case p if new File(p).exists => p}
    // process vgc files.
    val lss: List[SweepSum] = validPaths.zipWithIndex.map{case(p,i) =>
      SweepSum(p, entsInFile(p), i)
    }
    // print header
    println(("time" :: validPaths).mkString(","))
    // print rows
    lss.foreach { ss =>
      ss.ents.foreach { e =>
        val s = e.ts + "," + ","*ss.slot + e.dur + ","*(validPaths.size-ss.slot-1)
        println(s)
      }
    }
  }
  /** Look for sweep end patten and convert to list of STEnt.
   */
  def entsInFile(path: String): List[STEnt] = {
    println("processing " + path)
    val ptn = """(\d+\.\d+): \[CMS-concurrent-sweep: (\d+\.*\d*)/(\d+\.*\d*).*""".r
    Source.fromFile(path).getLines.collect{
        case ptn(ts,dur,durr) => STEnt(ts,dur)
    }.toList
  }
}