Skip to content

Commit

Permalink
DBD-1085 - Add run time info to rake export and import task (#1037)
Browse files Browse the repository at this point in the history
Only for export tasks
  • Loading branch information
fritzfreiheit authored and blancoj committed Aug 10, 2018
1 parent 99eb43d commit 71974fd
Show file tree
Hide file tree
Showing 2 changed files with 164 additions and 46 deletions.
112 changes: 81 additions & 31 deletions lib/tasks/yaml_populate_for_collection.rake
Original file line number Diff line number Diff line change
Expand Up @@ -4,68 +4,118 @@ require 'open-uri'

namespace :umrdr do

# bundle exec rake umrdr:yaml_populate_from_collection[nk322d32h,/deepbluedata-prep,true]
# bundle exec rake deepblue:yaml_populate_from_collection[nk322d32h,/deepbluedata-prep,true]
desc 'Yaml populate from collection'
# See: https://stackoverflow.com/questions/825748/how-to-pass-command-line-arguments-to-a-rake-task
task :yaml_populate_from_collection, %i[ collection_id target_dir export_files ] => :environment do |_task, args|
# puts "upgrade_provenance_log", args.as_json
args.with_defaults( collection_id: 'nk322d32h', target_dir: '/deepbluedata-prep', export_files: 'true' )
task = Umrdr::YamlPopulateFromCollection.new( collection_id: args[:collection_id],
target_dir: args[:target_dir],
export_files: args[:export_files] )
target_dir: args[:target_dir],
export_files: args[:export_files] )
task.run
end

# bundle exec rake umrdr:yaml_populate_from_multiple_collections
desc 'Yaml populate from multiple collections'
task yaml_populate_from_multiple_collections: :environment do
Umrdr::YamlPopulateFromMultipleCollections.run
# bundle exec rake umrdr:yaml_populate_from_multiple_collections['f4752g72m f4752g72m',/deepbluedata-prep,true]
desc 'Yaml populate from multiple collections (ids separated by spaces)'
task :yaml_populate_from_multiple_collections, %i[ collection_ids target_dir export_files ] => :environment do |_task, args|
# puts "upgrade_provenance_log", args.as_json
args.with_defaults( target_dir: '/deepbluedata-prep', export_files: 'true' )
task = Umrdr::YamlPopulateFromMultipleCollections.new( collection_ids: args[:collection_ids],
target_dir: args[:target_dir],
export_files: args[:export_files] )
task.run
end

end

module Umrdr

class YamlPopulateFromCollection
# see: http://ruby-doc.org/stdlib-2.0.0/libdoc/benchmark/rdoc/Benchmark.html
require 'benchmark'
include Benchmark

class YamlPopulateCol

def report( first_id:, measurements:, total: nil )
label = 'coll id'
label = label + ' ' * (first_id.size - label.size)
puts "#{label} #{Benchmark::CAPTION}"
format = Benchmark::FORMAT.chop
measurements.each do |measurement|
label = measurement.label
puts measurement.format( "#{label} #{format} is #{seconds_to_readable(measurement.real)}\n" )
end
return if total.blank?
label = 'total'
label = label + ' ' * (first_id.size - label.size)
puts total.format( "#{label} #{format} is #{seconds_to_readable(total.real)}\n" )
end

def initialize( collection_id:, target_dir:, export_files: )
@collection_id = collection_id
@target_dir = target_dir
@export_files = export_files.casecmp( 'true' ).zero?
def seconds_to_readable( seconds )
h,min,s,_fr = split_seconds( seconds )
return "#{h} hours, #{min} minutes, and #{s} seconds"
end

def run
puts "Exporting collection #{@collection_id} to '#{@target_dir}' with export files flag set to #{@export_files}"
Umrdr::MetadataHelper2.yaml_populate_collection( collection: @collection_id,
dir: @target_dir,
export_files: @export_files )
def split_seconds( fr )
#ss, fr = fr.divmod(86_400) # 4p
ss = ( fr + 0.5 ).to_int
h, ss = ss.divmod(3600)
min, s = ss.divmod(60)
return h, min, s, fr
end

end

class YamlPopulateFromCollectionTest
class YamlPopulateFromCollection < Umrdr::YamlPopulateCol

def initialize
@collection_id = 'nk322d32h'
@target_dir = '/deepbluedata-prep'
@export_files = true
def initialize( collection_id:, target_dir:, export_files: )
@collection_id = collection_id
@target_dir = target_dir
@export_files = export_files.casecmp( 'true' ).zero?
end

def run
Umrdr::MetadataHelper2.yaml_populate_collection( collection: @collection_id,
dir: @target_dir,
export_files: @export_files )
measurement = Benchmark.measure( @collection_id ) do
puts "Exporting collection #{@collection_id} to '#{@target_dir}' with export files flag set to #{@export_files}"
Umrdr::MetadataHelper2.yaml_populate_collection( collection: @collection_id,
dir: @target_dir,
export_files: @export_files )
end
report( first_id: @collection_id, measurements: [measurement] )
end

end

# TODO: parametrize the collection id
# TODO: parametrize the target directory
class YamlPopulateFromMultipleCollections
class YamlPopulateFromMultipleCollections < Umrdr::YamlPopulateCol

def initialize( collection_ids:, target_dir:, export_files: )
@collection_ids = collection_ids
@target_dir = target_dir
@export_files = export_files.casecmp( 'true' ).zero?
end

def self.run
ids = [ 'kh04dp82v', '7p88ch00j', '6108vb81z', 'v979v354p', 'x059c7753', 'gf06g3075', 't722h885b', '70795767w', '8p58pc92q', 'x920fx31k', 'j38607392' ]
ids.each { |id| Umrdr::MetadataHelper2.yaml_populate_collection( collection: id, export_files: true ) }
def run
ids = @collection_ids.split( ' ' )
return if ids.blank?
first_id = ids[0]
total = nil
measurements = []
ids.each do |id|
subtotal = Benchmark.measure( id ) do
puts "Exporting collection #{id} to '#{@target_dir}' with export files flag set to #{@export_files}"
Umrdr::MetadataHelper2.yaml_populate_collection( collection: id,
dir: @target_dir,
export_files: @export_files )
end
measurements << subtotal
if total.nil?
total = subtotal
else
total += subtotal
end
end
report( first_id: first_id, measurements: measurements, total: total )
end

end
Expand Down
98 changes: 83 additions & 15 deletions lib/tasks/yaml_populate_for_work.rake
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,63 @@ namespace :umrdr do
# puts "upgrade_provenance_log", args.as_json
args.with_defaults( target_dir: '/deepbluedata-prep', export_files: 'true' )
task = Umrdr::YamlPopulateFromWork.new( work_id: args[:work_id],
target_dir: args[:target_dir],
export_files: args[:export_files] )
target_dir: args[:target_dir],
export_files: args[:export_files] )
task.run
end

# bundle exec rake umrdr:yaml_populate_from_multiple_works
desc 'Yaml populate from multiple works'
task yaml_populate_from_multiple_works: :environment do
Umrdr::YamlPopulateFromMultipleWorks.run
# bundle exec rake umrdr:yaml_populate_from_multiple_works['f4752g72m f4752g72m',/deepbluedata-prep,true]
desc 'Yaml populate from multiple works (ids separated by spaces)'
task :yaml_populate_from_multiple_works, %i[ work_ids target_dir export_files ] => :environment do |_task, args|
# puts "upgrade_provenance_log", args.as_json
args.with_defaults( target_dir: '/deepbluedata-prep', export_files: 'true' )
task = Umrdr::YamlPopulateFromMultipleWorks.new( work_ids: args[:work_ids],
target_dir: args[:target_dir],
export_files: args[:export_files] )
task.run
end

end

module Umrdr

class YamlPopulateFromWork
# see: http://ruby-doc.org/stdlib-2.0.0/libdoc/benchmark/rdoc/Benchmark.html
require 'benchmark'
include Benchmark

class YamlPopulate

def report( first_id:, measurements:, total: nil )
label = 'work id'
label = label + ' ' * (first_id.size - label.size)
puts "#{label} #{Benchmark::CAPTION}"
format = Benchmark::FORMAT.chop
measurements.each do |measurement|
label = measurement.label
puts measurement.format( "#{label} #{format} is #{seconds_to_readable(measurement.real)}\n" )
end
return if total.blank?
label = 'total'
label = label + ' ' * (first_id.size - label.size)
puts total.format( "#{label} #{format} is #{seconds_to_readable(total.real)}\n" )
end

def seconds_to_readable( seconds )
h,min,s,_fr = split_seconds( seconds )
return "#{h} hours, #{min} minutes, and #{s} seconds"
end

def split_seconds( fr )
#ss, fr = fr.divmod(86_400) # 4p
ss = ( fr + 0.5 ).to_int
h, ss = ss.divmod(3600)
min, s = ss.divmod(60)
return h, min, s, fr
end

end

class YamlPopulateFromWork < Umrdr::YamlPopulate

def initialize( work_id:, target_dir:, export_files: )
@work_id = work_id
Expand All @@ -35,19 +76,46 @@ module Umrdr
end

def run
puts "Exporting work #{@work_id} to '#{@target_dir}' with export files flag set to #{@export_files}"
Umrdr::MetadataHelper2.yaml_populate_work( curation_concern: @work_id, dir: @target_dir, export_files: @export_files )
measurement = Benchmark.measure( @work_id ) do
puts "Exporting work #{@work_id} to '#{@target_dir}' with export files flag set to #{@export_files}"
Umrdr::MetadataHelper2.yaml_populate_work( curation_concern: @work_id,
dir: @target_dir,
export_files: @export_files )
end
report( first_id: @work_id, measurements: [measurement] )
end

end

# TODO: parametrize the work id
# TODO: parametrize the target directory
class YamlPopulateFromMultipleWorks
class YamlPopulateFromMultipleWorks < Umrdr::YamlPopulate

def self.run
ids = [ 'kh04dp82v', '7p88ch00j', '6108vb81z', 'v979v354p', 'x059c7753', 'gf06g3075', 't722h885b', '70795767w', '8p58pc92q', 'x920fx31k', 'j38607392' ]
ids.each { |id| Umrdr::MetadataHelper2.yaml_populate_work( curation_concern: id, export_files: true ) }
def initialize( work_ids:, target_dir:, export_files: )
@work_ids = work_ids
@target_dir = target_dir
@export_files = export_files.casecmp( 'true' ).zero?
end

def run
ids = @work_ids.split( ' ' )
return if ids.blank?
first_id = ids[0]
total = nil
measurements = []
ids.each do |id|
subtotal = Benchmark.measure( id ) do
puts "Exporting work #{id} to '#{@target_dir}' with export files flag set to #{@export_files}"
Umrdr::MetadataHelper2.yaml_populate_work( curation_concern: id,
dir: @target_dir,
export_files: @export_files )
end
measurements << subtotal
if total.nil?
total = subtotal
else
total += subtotal
end
end
report( first_id: first_id, measurements: measurements, total: total )
end

end
Expand Down

0 comments on commit 71974fd

Please sign in to comment.