From 71974fd219692329993f958890e9333f3495c982 Mon Sep 17 00:00:00 2001 From: Fritz Freiheit Date: Fri, 10 Aug 2018 10:08:34 -0400 Subject: [PATCH] DBD-1085 - Add run time info to rake export and import task (#1037) Only for export tasks --- lib/tasks/yaml_populate_for_collection.rake | 112 ++++++++++++++------ lib/tasks/yaml_populate_for_work.rake | 98 ++++++++++++++--- 2 files changed, 164 insertions(+), 46 deletions(-) diff --git a/lib/tasks/yaml_populate_for_collection.rake b/lib/tasks/yaml_populate_for_collection.rake index c986d67a..43917a64 100644 --- a/lib/tasks/yaml_populate_for_collection.rake +++ b/lib/tasks/yaml_populate_for_collection.rake @@ -4,68 +4,118 @@ require 'open-uri' namespace :umrdr do - # bundle exec rake umrdr:yaml_populate_from_collection[nk322d32h,/deepbluedata-prep,true] + # bundle exec rake deepblue:yaml_populate_from_collection[nk322d32h,/deepbluedata-prep,true] desc 'Yaml populate from collection' # See: https://stackoverflow.com/questions/825748/how-to-pass-command-line-arguments-to-a-rake-task task :yaml_populate_from_collection, %i[ collection_id target_dir export_files ] => :environment do |_task, args| # puts "upgrade_provenance_log", args.as_json args.with_defaults( collection_id: 'nk322d32h', target_dir: '/deepbluedata-prep', export_files: 'true' ) task = Umrdr::YamlPopulateFromCollection.new( collection_id: args[:collection_id], - target_dir: args[:target_dir], - export_files: args[:export_files] ) + target_dir: args[:target_dir], + export_files: args[:export_files] ) task.run end - # bundle exec rake umrdr:yaml_populate_from_multiple_collections - desc 'Yaml populate from multiple collections' - task yaml_populate_from_multiple_collections: :environment do - Umrdr::YamlPopulateFromMultipleCollections.run + # bundle exec rake umrdr:yaml_populate_from_multiple_collections['f4752g72m f4752g72m',/deepbluedata-prep,true] + desc 'Yaml populate from multiple collections (ids separated by spaces)' + task :yaml_populate_from_multiple_collections, %i[ collection_ids target_dir export_files ] => :environment do |_task, args| + # puts "upgrade_provenance_log", args.as_json + args.with_defaults( target_dir: '/deepbluedata-prep', export_files: 'true' ) + task = Umrdr::YamlPopulateFromMultipleCollections.new( collection_ids: args[:collection_ids], + target_dir: args[:target_dir], + export_files: args[:export_files] ) + task.run end end module Umrdr - class YamlPopulateFromCollection + # see: http://ruby-doc.org/stdlib-2.0.0/libdoc/benchmark/rdoc/Benchmark.html + require 'benchmark' + include Benchmark + + class YamlPopulateCol + + def report( first_id:, measurements:, total: nil ) + label = 'coll id' + label = label + ' ' * (first_id.size - label.size) + puts "#{label} #{Benchmark::CAPTION}" + format = Benchmark::FORMAT.chop + measurements.each do |measurement| + label = measurement.label + puts measurement.format( "#{label} #{format} is #{seconds_to_readable(measurement.real)}\n" ) + end + return if total.blank? + label = 'total' + label = label + ' ' * (first_id.size - label.size) + puts total.format( "#{label} #{format} is #{seconds_to_readable(total.real)}\n" ) + end - def initialize( collection_id:, target_dir:, export_files: ) - @collection_id = collection_id - @target_dir = target_dir - @export_files = export_files.casecmp( 'true' ).zero? + def seconds_to_readable( seconds ) + h,min,s,_fr = split_seconds( seconds ) + return "#{h} hours, #{min} minutes, and #{s} seconds" end - def run - puts "Exporting collection #{@collection_id} to '#{@target_dir}' with export files flag set to #{@export_files}" - Umrdr::MetadataHelper2.yaml_populate_collection( collection: @collection_id, - dir: @target_dir, - export_files: @export_files ) + def split_seconds( fr ) + #ss, fr = fr.divmod(86_400) # 4p + ss = ( fr + 0.5 ).to_int + h, ss = ss.divmod(3600) + min, s = ss.divmod(60) + return h, min, s, fr end end - class YamlPopulateFromCollectionTest + class YamlPopulateFromCollection < Umrdr::YamlPopulateCol - def initialize - @collection_id = 'nk322d32h' - @target_dir = '/deepbluedata-prep' - @export_files = true + def initialize( collection_id:, target_dir:, export_files: ) + @collection_id = collection_id + @target_dir = target_dir + @export_files = export_files.casecmp( 'true' ).zero? end def run - Umrdr::MetadataHelper2.yaml_populate_collection( collection: @collection_id, - dir: @target_dir, - export_files: @export_files ) + measurement = Benchmark.measure( @collection_id ) do + puts "Exporting collection #{@collection_id} to '#{@target_dir}' with export files flag set to #{@export_files}" + Umrdr::MetadataHelper2.yaml_populate_collection( collection: @collection_id, + dir: @target_dir, + export_files: @export_files ) + end + report( first_id: @collection_id, measurements: [measurement] ) end end - # TODO: parametrize the collection id - # TODO: parametrize the target directory - class YamlPopulateFromMultipleCollections + class YamlPopulateFromMultipleCollections < Umrdr::YamlPopulateCol + + def initialize( collection_ids:, target_dir:, export_files: ) + @collection_ids = collection_ids + @target_dir = target_dir + @export_files = export_files.casecmp( 'true' ).zero? + end - def self.run - ids = [ 'kh04dp82v', '7p88ch00j', '6108vb81z', 'v979v354p', 'x059c7753', 'gf06g3075', 't722h885b', '70795767w', '8p58pc92q', 'x920fx31k', 'j38607392' ] - ids.each { |id| Umrdr::MetadataHelper2.yaml_populate_collection( collection: id, export_files: true ) } + def run + ids = @collection_ids.split( ' ' ) + return if ids.blank? + first_id = ids[0] + total = nil + measurements = [] + ids.each do |id| + subtotal = Benchmark.measure( id ) do + puts "Exporting collection #{id} to '#{@target_dir}' with export files flag set to #{@export_files}" + Umrdr::MetadataHelper2.yaml_populate_collection( collection: id, + dir: @target_dir, + export_files: @export_files ) + end + measurements << subtotal + if total.nil? + total = subtotal + else + total += subtotal + end + end + report( first_id: first_id, measurements: measurements, total: total ) end end diff --git a/lib/tasks/yaml_populate_for_work.rake b/lib/tasks/yaml_populate_for_work.rake index 6c980b68..7e50ba64 100644 --- a/lib/tasks/yaml_populate_for_work.rake +++ b/lib/tasks/yaml_populate_for_work.rake @@ -11,22 +11,63 @@ namespace :umrdr do # puts "upgrade_provenance_log", args.as_json args.with_defaults( target_dir: '/deepbluedata-prep', export_files: 'true' ) task = Umrdr::YamlPopulateFromWork.new( work_id: args[:work_id], - target_dir: args[:target_dir], - export_files: args[:export_files] ) + target_dir: args[:target_dir], + export_files: args[:export_files] ) task.run end - # bundle exec rake umrdr:yaml_populate_from_multiple_works - desc 'Yaml populate from multiple works' - task yaml_populate_from_multiple_works: :environment do - Umrdr::YamlPopulateFromMultipleWorks.run + # bundle exec rake umrdr:yaml_populate_from_multiple_works['f4752g72m f4752g72m',/deepbluedata-prep,true] + desc 'Yaml populate from multiple works (ids separated by spaces)' + task :yaml_populate_from_multiple_works, %i[ work_ids target_dir export_files ] => :environment do |_task, args| + # puts "upgrade_provenance_log", args.as_json + args.with_defaults( target_dir: '/deepbluedata-prep', export_files: 'true' ) + task = Umrdr::YamlPopulateFromMultipleWorks.new( work_ids: args[:work_ids], + target_dir: args[:target_dir], + export_files: args[:export_files] ) + task.run end end module Umrdr - class YamlPopulateFromWork + # see: http://ruby-doc.org/stdlib-2.0.0/libdoc/benchmark/rdoc/Benchmark.html + require 'benchmark' + include Benchmark + + class YamlPopulate + + def report( first_id:, measurements:, total: nil ) + label = 'work id' + label = label + ' ' * (first_id.size - label.size) + puts "#{label} #{Benchmark::CAPTION}" + format = Benchmark::FORMAT.chop + measurements.each do |measurement| + label = measurement.label + puts measurement.format( "#{label} #{format} is #{seconds_to_readable(measurement.real)}\n" ) + end + return if total.blank? + label = 'total' + label = label + ' ' * (first_id.size - label.size) + puts total.format( "#{label} #{format} is #{seconds_to_readable(total.real)}\n" ) + end + + def seconds_to_readable( seconds ) + h,min,s,_fr = split_seconds( seconds ) + return "#{h} hours, #{min} minutes, and #{s} seconds" + end + + def split_seconds( fr ) + #ss, fr = fr.divmod(86_400) # 4p + ss = ( fr + 0.5 ).to_int + h, ss = ss.divmod(3600) + min, s = ss.divmod(60) + return h, min, s, fr + end + + end + + class YamlPopulateFromWork < Umrdr::YamlPopulate def initialize( work_id:, target_dir:, export_files: ) @work_id = work_id @@ -35,19 +76,46 @@ module Umrdr end def run - puts "Exporting work #{@work_id} to '#{@target_dir}' with export files flag set to #{@export_files}" - Umrdr::MetadataHelper2.yaml_populate_work( curation_concern: @work_id, dir: @target_dir, export_files: @export_files ) + measurement = Benchmark.measure( @work_id ) do + puts "Exporting work #{@work_id} to '#{@target_dir}' with export files flag set to #{@export_files}" + Umrdr::MetadataHelper2.yaml_populate_work( curation_concern: @work_id, + dir: @target_dir, + export_files: @export_files ) + end + report( first_id: @work_id, measurements: [measurement] ) end end - # TODO: parametrize the work id - # TODO: parametrize the target directory - class YamlPopulateFromMultipleWorks + class YamlPopulateFromMultipleWorks < Umrdr::YamlPopulate - def self.run - ids = [ 'kh04dp82v', '7p88ch00j', '6108vb81z', 'v979v354p', 'x059c7753', 'gf06g3075', 't722h885b', '70795767w', '8p58pc92q', 'x920fx31k', 'j38607392' ] - ids.each { |id| Umrdr::MetadataHelper2.yaml_populate_work( curation_concern: id, export_files: true ) } + def initialize( work_ids:, target_dir:, export_files: ) + @work_ids = work_ids + @target_dir = target_dir + @export_files = export_files.casecmp( 'true' ).zero? + end + + def run + ids = @work_ids.split( ' ' ) + return if ids.blank? + first_id = ids[0] + total = nil + measurements = [] + ids.each do |id| + subtotal = Benchmark.measure( id ) do + puts "Exporting work #{id} to '#{@target_dir}' with export files flag set to #{@export_files}" + Umrdr::MetadataHelper2.yaml_populate_work( curation_concern: id, + dir: @target_dir, + export_files: @export_files ) + end + measurements << subtotal + if total.nil? + total = subtotal + else + total += subtotal + end + end + report( first_id: first_id, measurements: measurements, total: total ) end end