Skip to content

Commit

Permalink
Merge pull request #23 from mlibrary/sorting-and-docs
Browse files Browse the repository at this point in the history
Sorting and docs
  • Loading branch information
niquerio authored Dec 18, 2023
2 parents 2f7019c + 10c0e4a commit 5d85722
Show file tree
Hide file tree
Showing 26 changed files with 232 additions and 99 deletions.
51 changes: 0 additions & 51 deletions database_strategy.txt

This file was deleted.

6 changes: 6 additions & 0 deletions lib/authority_browse/db/names.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
module AuthorityBrowse
class DB
class Names < AuthorityBrowse::DB
# Tables for names for AuthorityBrowse
#
# @return [Hash]
def self.database_definitions
{
names: proc do
Expand All @@ -23,6 +26,9 @@ def self.database_definitions
}
end

# Sets indexes on the :names and :names_see_also tables
#
# @return [Nil]
def self.set_names_indexes!
AuthorityBrowse.db.alter_table(:names) do
add_index :id
Expand Down
6 changes: 6 additions & 0 deletions lib/authority_browse/db/subjects.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
module AuthorityBrowse
class DB
class Subjects < AuthorityBrowse::DB
# Tables for subjects for AuthorityBrowse
#
# @return [Hash]
def self.database_definitions
{
subjects: proc do
Expand All @@ -24,6 +27,9 @@ def self.database_definitions
}
end

# Sets indexes on the :subjects and :subjects_xrefs tables
#
# @return [Nil]
def self.set_subjects_indexes!
AuthorityBrowse.db.alter_table(:subjects) do
add_index :id
Expand Down
17 changes: 17 additions & 0 deletions lib/authority_browse/db_mutator/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,20 @@ module AuthorityBrowse
class DBMutator
class Base
class << self
# Sets count to 0 in the main table.
#
# @return [Nil]
def zero_out_counts
AuthorityBrowse.db.transaction do
AuthorityBrowse.db[main_table].update(count: 0)
end
end

# Updates the main table with counts from the from_biblio table.
# The match between the tables happens on the `match_text` fields
# in both tables.
#
# @return [Nil]
def update_main_with_counts
statement = <<~SQL.strip
UPDATE #{main_table} AS m
Expand All @@ -20,6 +28,10 @@ def update_main_with_counts
AuthorityBrowse.db.run(statement)
end

# Removes deprecated terms in the main table when there is an
# undeprecated term with the same match text.
#
# @return [Nil]
def remove_deprecated_when_undeprecated_match_text_exists
statement = <<~SQL.strip
DELETE FROM #{main_table}
Expand All @@ -33,6 +45,11 @@ def remove_deprecated_when_undeprecated_match_text_exists
AuthorityBrowse.db.run(statement)
end

# Updates the from_biblio table with ids of matching entries in the
# main_table. This enables determining the list of unmatched entries in
# the from_biblio table
#
# @return [Nil]
def add_ids_to_from_biblio
statement = <<~SQL.strip
UPDATE #{from_biblio_table} AS fb
Expand Down
9 changes: 9 additions & 0 deletions lib/authority_browse/db_mutator/names.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,31 @@ module AuthorityBrowse
class DBMutator
class Names < Base
class << self
# Alias of update_main_with_counts
#
# @return [Nil]
def update_names_with_counts
update_main_with_counts
end

# Alias of add_ids_to_from_biblio
#
# @return [Nil]
def add_ids_to_names_from_biblio
add_ids_to_from_biblio
end

# @return [:Symbol]
def main_id
:name_id
end

# @return [:Symbol]
def main_table
:names
end

# @return [:Symbol]
def from_biblio_table
:names_from_biblio
end
Expand Down
9 changes: 9 additions & 0 deletions lib/authority_browse/db_mutator/subjects.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,31 @@ module AuthorityBrowse
class DBMutator
class Subjects < Base
class << self
# Alias of update_main_with_counts
#
# @return [Nil]
def update_subjects_with_counts
update_main_with_counts
end

# Alias of add_ids_to_from_biblio
#
# @return [Nil]
def add_ids_to_subjects_from_biblio
add_ids_to_from_biblio
end

# @return [:Symbol]
def main_id
:subject_id
end

# @return [:Symbol]
def main_table
:subjects
end

# @return [:Symbol]
def from_biblio_table
:subjects_from_biblio
end
Expand Down
3 changes: 3 additions & 0 deletions lib/authority_browse/loc_authorities/entry.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@ def id
@id ||= "http://id.loc.gov#{@data["@id"]}"
end

# @return [Hash] component from "@graph" that describes the main id
def main_component
@main_component ||= @data["@graph"].find { |x| x["@id"] == id }
end

# @return [String] Preferred Label
def label
raise NotImplementedError
end
Expand All @@ -26,6 +28,7 @@ def match_text
AuthorityBrowse::Normalize.match_text(label)
end

# @return [Boolean] Do any of the graph elements show that this id has been deprecated?
def deprecated?
@data["@graph"].any? { |x| x["cs:changeReason"] == "deprecated" }
end
Expand Down
2 changes: 1 addition & 1 deletion lib/authority_browse/loc_authorities/name.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def label
main_component["skos:prefLabel"] || main_component["skosxl:literalForm"]
end

# @return [Array] [Array of strings of see_also_ids]
# @return [Array<String>] ids of see also xrefs
def see_also_ids
@see_also_ids ||= _get_xref_ids("rdfs:seeAlso")
end
Expand Down
4 changes: 4 additions & 0 deletions lib/authority_browse/loc_authorities/subject.rb
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
module AuthorityBrowse
module LocAuthorities
class Subject < Entry
# @return [String] Preferred Label
def label
main_component&.dig("skos:prefLabel", "@value") || main_component&.dig("skosxl:literalForm", "@value")
end

# @return [Array<String>] ids of broader xrefs
def broader_ids
@broader_ids ||= _get_xref_ids("skos:broader")
end

# @return [Array<String>] ids of narrower xrefs
def narrower_ids
@narrower_ids ||= _get_xref_ids("skos:narrower")
end

# @return [Boolean] Does it have any xref_ids?
def xref_ids?
!(narrower_ids.empty? && broader_ids.empty?)
end
Expand Down
29 changes: 23 additions & 6 deletions lib/authority_browse/names.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,18 @@
module AuthorityBrowse
class Names < Base
class << self
# What kind of Object is it?
#
# @return [String]
def kind
"name"
end

# Loads the names and names_see_also table with data from loc
# Loads the names and names_see_also table with data from LOC
#
# @param loc_file_getter [Proc] when called needs to put a file with skos
# data into skos_file
# @return [Nil]
def reset_db(loc_file_getter = lambda { fetch_skos_file })
# get names file
loc_file_getter.call
Expand Down Expand Up @@ -57,9 +62,11 @@ def reset_db(loc_file_getter = lambda { fetch_skos_file })
end
end

# Loads solr with documents of names that match data from library of
# congress.
# Loads solr with documents of names that match data from Library of
# Congress.
#
# @param solr_uploader [Solr::Uploader]
# @return [Nil]
def load_solr_with_matched(solr_uploader = Solr::Uploader.new(collection: "authority_browse_reindex"))
write_docs do |out, milemarker|
AuthorityBrowse.db.fetch(get_matched_query).stream.chunk_while { |bef, aft| aft[:id] == bef[:id] }.each do |ary|
Expand All @@ -71,9 +78,11 @@ def load_solr_with_matched(solr_uploader = Solr::Uploader.new(collection: "autho
solr_uploader.send_file_to_solr(solr_docs_file)
end

# Loads solr with documents of names that don't match entries in library
# of congress
# Loads solr with documents of names that don't match entries in Library
# of Congress
#
# @param solr_uploader [Solr::Uploader]
# @return [Nil]
def load_solr_with_unmatched(solr_uploader = Solr::Uploader.new(collection: "authority_browse_reindex"))
write_docs do |out, milemarker|
AuthorityBrowse.db[:names_from_biblio].stream.filter(name_id: nil).where { count > 0 }.each do |name|
Expand All @@ -87,7 +96,7 @@ def load_solr_with_unmatched(solr_uploader = Solr::Uploader.new(collection: "aut
# Sequel query that gets names and see alsos with their counts
#
# Private method
# return [String]
# @return [String]
def get_matched_query
<<~SQL.strip
SELECT names.id,
Expand All @@ -104,20 +113,28 @@ def get_matched_query
SQL
end

# Field name/Facet in Biblio that we should get counts for
#
# @return [String]
def field_name
"author_authoritative_browse"
end

# URL for LOC skos file
#
# @return [String]
def remote_skos_file
"https://id.loc.gov/download/authorities/names.skosrdf.jsonld.gz"
end

# Path to the file library of congress skos data
#
# @return [String]
def local_skos_file
"tmp/names.skosrdf.jsonld.gz"
end

# @return [Symbol]
def from_biblio_table
:names_from_biblio
end
Expand Down
20 changes: 9 additions & 11 deletions lib/authority_browse/normalize.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,13 @@ module Normalize
# We want it to match solr because we need to generate a search string that will find all the stuff
# in the catalog we're claiming it should find.

# PUNCT_SPACE_COMBO = /(?:\p{P}+(?:\s+|\Z))|(?:(?:\A|\s+)\p{P}+)/
UNNECESSARY_ENDING_PUNCT = /[\/.;,]+\Z/

# not sure this is used anywhere
# For a sort key, we want to eliminate punctuation in general.
# However, things that act like a space between words should
# be turned into spaces.

# This should match as exactly as possible the fieldType authority_search

WHICH_PUNCT_TO_SPACIFY = /[:-]+/
EMPTY_STRING = ""
ONE_SPACE = " "
# this is used
# Return the appropriate match text for a given string
#
# @param str [String] String to be normalized
# @return [String] Normalized string
def match_text(str)
str = unicode_normalize(str)
str.gsub!(/\Athe\s+/, EMPTY_STRING)
Expand All @@ -41,6 +34,11 @@ def match_text(str)
cleanup_spaces(str)
end

# Gets rid of leading and trailing spaces. Shrinks other space to a single
# space.
#
# @param str [String] String with spaces
# @return [String] String with appropriate number of spaces
def cleanup_spaces(str)
str.gsub(/\s+/, ONE_SPACE).strip
end
Expand Down
Loading

0 comments on commit 5d85722

Please sign in to comment.