#!/usr/bin/ruby -Ku # # DiaaiD: トレたびの時刻表からダイヤデータを作る # by mobitan 2011/08/15 # require 'rubygems' require 'mechanize' require 'logger' require 'kconv' if ARGV.size != 3 puts "usage: #{$0} " puts "example: ruby #{$0} 'TT=1&VT=T&S1=28155&S2=28283&RL=&PG=1&FX=1&EX=28155&OT=7&DI=20&TC=SLERBN&YY=&M=&D=' 0 result.dia" exit 1 end parameters = ARGV[0] # トレたびの CGI に渡すパラメータ (URI 形式の文字列) precision = ARGV[1] # 0:簡易モード 1:詳細モード (各列車のページから着時刻を取得する) outputfile = ARGV[2] # 出力ファイルパス # 1路線の全列車の時刻表 class Dia def initialize() @stations = Array.new @station_names = Array.new @trains = Array.new @lkinds = ["普通", "快速", "特別快速", "新快速", "通勤快速", "準急", "急行", "快速急行", "特急", "快速特急", "通勤準急", "通勤急行", "区間快速", "区間急行", "回送", "貨物", "急行貨物"] @skinds = ["", "快速", "特快", "新快", "通快", "準急", "急行", "快急", "特急", "快特", "通準", "通急", "区快", "区急", "回送", "貨物", "急貨"] end attr_accessor :timestamp, :source, :title, :line, :section, :direction, :stations, :station_names, :trains, :lkinds, :skinds # 種別を正規化 (なければ追加) してインデックスを返す def guess_kind_index(str) if i = @lkinds.index(str) i elsif x = @lkinds.find{ |item| item[0, str.length] == str } @lkinds.index(x) elsif x = @skinds.find{ |item| item[0, str.length] == str } @skinds.index(x) else @lkinds.push(str).length - 1 @skinds.push(str[/^../]).length - 1 end end # 種別を正規化 (なければ追加) して文字列を返す def guess_kind(str) @lkinds[guess_kind_index(str)] end end # 1列車の全駅の時刻表 class Train def initialize() @waypoints = Hash.new end attr_accessor :ksid, :shubetsu, :aisho, :gousu, :retsuban, :kukan, :bikou, :stations, :waypoints end # 1列車の1駅の時刻 class Waypoint attr_accessor :station, :station_name, :arrtime, :deptime, :trackno end # トレたび class ToretabiScraper def initialize() @@server = "http://jikoku.toretabi.jp" @agent = WWW::Mechanize.new{ |a| a.user_agent_alias = 'Windows IE 7' # EUC-JP対策 http://d.hatena.ne.jp/otn/20091124 a.post_connect_hooks << Proc.new do |params| if %r!text/html! =~ params[:response]["Content-Type"] # EUC-JPからUTF-8へ変換を決め打ち。head/metaのContent-Typeのcharset文字列もsubで変換 params[:response_body] = NKF.nkf("-wm0E",params[:response_body]).sub(/euc-jp/,"utf-8") params[:response]["Content-Type"]="text/html; charset=utf-8" end end a.log = Logger.new("diaaid_toretabi.log", 5) a.log.level = Logger::INFO } @dia = Dia.new end attr_accessor :dia # 一覧を読み込む def load(params, precision) # params # TT = 1:意味ないが必須 # S1 = 発駅名(検索) or 発駅コード(確定) # S2 = 着駅名(検索) or 着駅コード(確定) # RL = 路線名(検索) or 路線コード(確定) # TR = 列車名 # PG = 経路候補番号 # TC = S:新幹線 L:特急 E:急行 R:快速 B:寝台 N:普通 # YY = 年 # M = 月 # D = 日 # VT = T:列車時刻表 D:駅時刻表 # EX = 基準駅コード # OT = 基準時刻 # DI = ページ表示本数 # PO = ページオフセット # precision # 0:簡易(列車概要と発時刻) 1:詳細(列車詳細と着発時刻) raise "Bad parameter\n#{params}" if not (params =~ /TT=\d+/) uri = "#{@@server}/cgi-bin/tra.cgi/tra-tt?#{params}" @agent.get(uri) @dia.source = uri @dia.timestamp = Time.now # 路線名を取得 @agent.page.search("//div[@id='text_area']/dl/dt").each do |elem| @dia.title = elem.inner_text.gsub(/\s+/, '') end # 区間を取得 @agent.page.search("//div[@id='text_area']/dl/dd").each do |elem| if elem.inner_text =~ /※(.*?)間の列車/ @dia.section = $1.gsub(/\s+/, '') break end end # 駅名一覧を取得。表は省略されてるのでリストボックスから @agent.page.search("//select[@name='EX']/option").each_with_index do |elem, i| @dia.station_names[i] = elem.inner_text @dia.stations[i] = "#{@dia.station_names[i]}##{@dia.station_names.count(@dia.station_names[i])}" end # 列車データを取得 rows = @agent.page.search("//table[@class='table_border']//tr") rows[1].search("./td")[1..-1].each_with_index do |dummy, x| cells = rows.search("./td[#{x + 2}]") train = Train.new # 概要 train.ksid = cells[1].at("./a")["href"][/TR=(\d+)/, 1] train.retsuban = cells[0].inner_text.sub(/\.+$/, '').strip shubetsu = cells[1].inner_text.sub(/\.+$/, '').strip train.aisho = cells[2].inner_text.sub(/\.+$/, '').strip train.bikou = cells[3].inner_text.sub(/\.+$/, '').strip shihatsu = cells[4].inner_text.sub(/\.+$/, '').strip shuchaku = cells[5].inner_text.sub(/\.+$/, '').strip train.shubetsu = @dia.guess_kind(shubetsu) train.kukan = "#{shihatsu}~#{shuchaku}" # 経由駅 (発時刻/通過) cells[6..-1].each_with_index do |cell, y| if cell.inner_text =~ /^\d+$/ || cell.inner_text == "↓" waypoint = Waypoint.new waypoint.station = @dia.stations[y] waypoint.station_name = @dia.station_names[y] if cell.inner_text == "↓" waypoint.deptime = "-" else waypoint.deptime = cell.inner_text end train.waypoints[waypoint.station] = waypoint train.waypoints["#{waypoint.station_name}@#{waypoint.deptime}"] = waypoint end end # 詳細 if precision.to_i > 0 sleep(1) load_detail(train) end @dia.trains[x] = train end dia end # 詳細を読み込む def load_detail(train) @agent.get("#{@@server}/cgi-bin/trinf.cgi/route/trinf?TR=#{train.ksid}") # 列車名を取得 @agent.page.search("//div[@id='text_area']/dl/dt").each do |elem| names = elem.inner_text.split(/\s+/) train.aisho = names[1] || "" train.gousu = names[2] || "" train.gousu = train.gousu[/\d+/] end # 備考を取得 bik = "" @agent.page.search("//div[@id='text_area']/dl/dd").each do |elem| bik << elem.inner_text << " " end bik = bik.gsub(/【.*?】/, '') bik = bik.gsub(/列車番号\[(.*?)\]/, '') bik = bik.gsub(/列車予約コード\[(.*?)\]/, '') train.bikou = bik # 時刻を取得 @agent.page.search("//table[@class='trinf_table_border']//tr")[1..-1].each do |row| cells = row.search("./td") station_name = cells[0].inner_text arrtime = cells[1].inner_text deptime = cells[2].inner_text trackno = cells[3].inner_text waypoint = train.waypoints["#{station_name}@#{deptime}"] || train.waypoints["#{station_name}@#{arrtime}"] if waypoint waypoint.arrtime = arrtime waypoint.deptime = deptime waypoint.trackno = trackno end end end end # CSV class CsvFormatter def format_dia(dia) res = "" dia.trains.each do |train| res << "\t#{train.retsuban}" end res << "\n" dia.stations.each do |station| res << station.sub(/#\d+$/, '') dia.trains.each do |train| waypoint = train.waypoints[station] if waypoint res << format("\t%4s/%4s", waypoint.arrtime, waypoint.deptime, waypoint.trackno) else res << "\t " end end res << "\n" end res end end # WinDIA class WindiaFormatter def initialize(dia) @dia = dia @upward = (@dia.title[/\((下り|上り)\)/, 1] == "上り") if @upward @stations = @dia.stations.reverse # 駅順序は下り方向に固定 else @stations = @dia.stations end end attr_accessor :dia def filename() timestamp = @dia.timestamp.strftime('%y%m%d') "#{timestamp}_#{@dia.title}_#{@dia.section}.dia".gsub(/\s+/, '') end def format(precision) timestamp = @dia.timestamp.strftime('%Y年%m月%d日閲覧') res = "" res << "[WinDIA]\n" res << "#{@dia.title} #{@dia.section} #{timestamp}\n" res << "#{@dia.source}\n\n" # 駅 res << "[駅]\n" @stations.each_with_index do |station, i| res << "p" if i == 0 || i == @dia.stations.size - 1 # p:主要駅 res << "b" if precision.to_i > 0 # b:着発時刻表示 res << "," res << station.sub(/#\d+$/, '') res << "\n" end res << "\n" # 線種 res << "[線種]\n" res << "LINES=112,64,192,192,64,88,92,220,212,224,88,92,64,92,115,108,236,104,104,104,104,104,104,104\n" @dia.lkinds.each_index do |i| ii = "%02d" % i lk = @dia.lkinds[i] sk = @dia.skinds[i] res << "Train#{ii}=#{lk},#{sk}\n" if lk end res << "\n" # 列車 if @upward res << "[上り]\n" else res << "[下り]\n" end @dia.trains.each do |train| res << format_train(train) << "\n" end res.gsub(/\n/, "\r\n").tosjis end def format_train(train) r = "" kind = @dia.guess_kind_index(train.shubetsu) r << "#{kind}," # 種別 r << "#{train.retsuban}," # 列車番号 r << "#{train.aisho}," # 愛称 r << "#{train.gousu}," # 号数 @stations.each_with_index do |station, i| r << "\\\n\t" if i > 0 && i % 12 == 0 waypoint = train.waypoints[station] if not waypoint r << "," elsif waypoint.arrtime || waypoint.deptime r << "#{waypoint.arrtime.sub(/^0/, '')}/" if waypoint.arrtime r << "#{waypoint.deptime.sub(/^0/, '')}" if waypoint.deptime r << "," end end r << "\\\n\t%#{train.kukan} #{train.bikou}" if train.bikou r end end # 本体 begin input = ToretabiScraper.new input.load(parameters, precision) output = WindiaFormatter.new(input.dia) open(outputfile, 'w') do |f| f.puts(output.format(precision)) end puts(output.filename) rescue puts($!) puts($!.backtrace) end __END__ # トレたびトップページからの自動ナビゲートも作ってみた # 最初のページ = 駅名・条件入力 agent.get("http://jikoku.toretabi.jp/cgi-bin/tra.cgi/cond") agent.page.form_with(:action => %r!/tra\.cgi/cond!) { |form| form.field_with(:name => 'S1').value = 'kimitsu' # 発駅名 form.field_with(:name => 'S2').value = 'tateyama' # 着駅名 form.field_with(:name => 'RL').value = '' # 路線名 form.field_with(:name => 'TR').value = '' # 列車名 form.field_with(:name => 'YY').value = '2010' # 年 form.field_with(:name => 'M').value = '2' # 月 form.field_with(:name => 'D').value = '27' # 日 form.checkbox_with(:name => 'TC', :value => 'S').check # 新幹線 form.checkbox_with(:name => 'TC', :value => 'L').check # 特急 form.checkbox_with(:name => 'TC', :value => 'E').check # 急行 form.checkbox_with(:name => 'TC', :value => 'R').check # 快速 form.checkbox_with(:name => 'TC', :value => 'B').check # 寝台 form.checkbox_with(:name => 'TC', :value => 'N').check # 普通 form.radiobutton_with(:name => 'VT', :value => 'T').check # T:列車時刻表, D:駅時刻表 sleep(1) form.click_button } # http://jikoku.toretabi.jp/cgi-bin/tra.cgi/cond?S1=kimitsu&S2=tateyama&TC=SLERBN&VT=T # S1=発駅名 # S2=着駅名 # 次のページ = 駅コード選択 agent.get("http://jikoku.toretabi.jp/cgi-bin/tra.cgi/cond") agent.page.form_with(:action => %r!/tra\.cgi/cond!) { |form| form.field_with(:name => 'S1').value = '22264' # 発駅コード form.field_with(:name => 'S2').value = '22350' # 着駅コード form.click_button } # http://jikoku.toretabi.jp/cgi-bin/tra.cgi/cond?S1=22264&S2=22350&TC=SLERBN&VT=T # S1=発駅コード # S2=着駅コード # 次のページ = 路線選択 agent.page.form_with(:action => %r!/tra\.cgi/tra-tt!) { |form| form.field_with(:name => 'PG').value = '1' # 選択肢番号 sleep(1) form.click_button } # http://jikoku.toretabi.jp/cgi-bin/tra.cgi/tra-tt?S1=22264&S2=22350&TC=SLERBN&VT=T&PG=1&TT=1 # PG=候補番号 # TT=1固定 (数字は何でもいいみたい) # 次のページ = 表示本数入力 agent.page.form_with(:action => %r!/tra\.cgi/tra-tt!) { |form| form.field_with(:name => 'DI').value = '5' # 表示本数 sleep(1) form.click_button } # http://jikoku.toretabi.jp/cgi-bin/tra.cgi/tra-tt?S1=22264&S2=22350&TC=SLERBN&VT=T&PG=1&TT=1&DI=100 # DI=表示本数 # 次のページ = 全列車の発時刻表 dia = Dia.new agent.page.search("//select[@name='EX']/option").each do |option| dia.stations.push(option.inner_text) end visited = Hash.new loop do nextpagelink = agent.page.link_with(:text => %r!次の\d+!) agent.page.links_with(:href => %r!/trinf\.cgi/route/trinf!).each do |link| ksid = link.href.match(%r!TR=(\d+)!).to_a[1] # 交通新聞社の列車 ID next if visited[ksid] visited[ksid] = true sleep(1) link.click # 次のページ = 列車1本の着発時刻表 train = Train.new train.ksid = ksid agent.page.search("//table[@class='trinf_table_border']//tr")[1..-1].each do |row| cells = row.search('./td') stop = Waypoint.new stop.station = cells[0].inner_text stop.arrtime = cells[1].inner_text stop.deptime = cells[2].inner_text stop.trackno = cells[3].inner_text train.stops[stop.station] = stop end dia.trains.push(train) end break if not nextpagelink # 次のページ = 全列車の発時刻表 sleep(1) nextpagelink.click # http://jikoku.toretabi.jp/cgi-bin/tra.cgi/tra-tt?S1=22264&S2=22350&TC=SLERBN&VT=T&PG=1&TT=1&DI=100&PO=10 # PO=オフセット end