From 36c68128b4c70a7d194492073af02f11bdde8e62 Mon Sep 17 00:00:00 2001 From: Difegue Date: Tue, 29 Aug 2023 21:16:00 +0200 Subject: [PATCH] Use trim instead of remove_spaces everywhere And move remove_newlines and trim_url to Utils::String --- lib/LANraragi/Controller/Config.pm | 12 ++-- lib/LANraragi/Model/Archive.pm | 31 ++++++----- lib/LANraragi/Model/Backup.pm | 6 +- lib/LANraragi/Model/Plugins.pm | 6 +- lib/LANraragi/Model/Search.pm | 21 +++---- lib/LANraragi/Model/Stats.pm | 24 ++++---- lib/LANraragi/Model/Upload.pm | 15 ++--- lib/LANraragi/Plugin/Metadata/Eze.pm | 49 +++++++++-------- lib/LANraragi/Plugin/Metadata/Fakku.pm | 12 ++-- lib/LANraragi/Plugin/Metadata/Koushoku.pm | 6 +- lib/LANraragi/Plugin/Metadata/RegexParse.pm | 3 +- lib/LANraragi/Plugin/Scripts/SourceFinder.pm | 2 +- lib/LANraragi/Utils/Database.pm | 58 ++++++++++---------- lib/LANraragi/Utils/Generic.pm | 58 +++++--------------- lib/LANraragi/Utils/Minion.pm | 11 ++-- lib/LANraragi/Utils/String.pm | 45 ++++++++++++--- lib/LANraragi/Utils/Tags.pm | 57 ++++++++++--------- tests/LANraragi/Utils/String.t | 31 +++++------ 18 files changed, 227 insertions(+), 220 deletions(-) diff --git a/lib/LANraragi/Controller/Config.pm b/lib/LANraragi/Controller/Config.pm index 98d1fc1a9..083bc60b8 100644 --- a/lib/LANraragi/Controller/Config.pm +++ b/lib/LANraragi/Controller/Config.pm @@ -1,7 +1,8 @@ package LANraragi::Controller::Config; use Mojo::Base 'Mojolicious::Controller'; -use LANraragi::Utils::Generic qw(generate_themes_header remove_spaces remove_newlines); +use LANraragi::Utils::Generic qw(generate_themes_header); +use LANraragi::Utils::String qw(trim trim_CRLF); use LANraragi::Utils::Database qw(redis_encode save_computed_tagrules); use LANraragi::Utils::TempFolder qw(get_tempsize); use LANraragi::Utils::Tags qw(tags_rules_to_array replace_CRLF restore_CRLF); @@ -127,10 +128,11 @@ sub save_config { # Clean up the user's inputs for non-toggle options and encode for redis insertion foreach my $key ( keys %confhash ) { - remove_spaces( $confhash{$key} ); - remove_newlines( $confhash{$key} ); - $confhash{$key} = redis_encode( $confhash{$key} ); - $self->LRR_LOGGER->debug( "Saving $key with value " . $confhash{$key} ); + my $value = $confhash{$key}; + $value = trim($value); + $value = trim_CRLF($value); + $value = redis_encode($value); + $self->LRR_LOGGER->debug( "Saving $key with value " . $value ); } #for all keys of the hash, add them to the redis config hash with the matching keys. diff --git a/lib/LANraragi/Model/Archive.pm b/lib/LANraragi/Model/Archive.pm index d21554e2b..30e47e480 100644 --- a/lib/LANraragi/Model/Archive.pm +++ b/lib/LANraragi/Model/Archive.pm @@ -14,7 +14,8 @@ use File::Basename; use File::Copy "cp"; use File::Path qw(make_path); -use LANraragi::Utils::Generic qw(remove_spaces remove_newlines render_api_response); +use LANraragi::Utils::Generic qw(render_api_response); +use LANraragi::Utils::String qw(trim trim_CRLF); use LANraragi::Utils::TempFolder qw(get_temp); use LANraragi::Utils::Logging qw(get_logger); use LANraragi::Utils::Archive qw(extract_single_file extract_thumbnail); @@ -27,14 +28,14 @@ use LANraragi::Utils::Database sub get_title($id) { my $logger = get_logger( "Archives", "lanraragi" ); - my $redis = LANraragi::Model::Config->get_redis; + my $redis = LANraragi::Model::Config->get_redis; if ( $id eq "" ) { $logger->debug("No archive ID provided."); return (); } - return redis_decode($redis->hget( $id, "title" )); + return redis_decode( $redis->hget( $id, "title" ) ); } # Functions used when dealing with archives. @@ -57,8 +58,8 @@ sub update_thumbnail { $page = 1 unless $page; my $thumbdir = LANraragi::Model::Config->get_thumbdir; - my $use_jxl = LANraragi::Model::Config->get_jxlthumbpages; - my $format = $use_jxl ? 'jxl' : 'jpg'; + my $use_jxl = LANraragi::Model::Config->get_jxlthumbpages; + my $format = $use_jxl ? 'jxl' : 'jpg'; # Thumbnails are stored in the content directory, thumb subfolder. # Another subfolder with the first two characters of the id is used for FS optimization. @@ -100,9 +101,9 @@ sub serve_thumbnail { my $no_fallback = $self->req->param('no_fallback'); $no_fallback = ( $no_fallback && $no_fallback eq "true" ) || "0"; # Prevent undef warnings by checking the variable first - my $thumbdir = LANraragi::Model::Config->get_thumbdir; - my $use_jxl = LANraragi::Model::Config->get_jxlthumbpages; - my $format = $use_jxl ? 'jxl' : 'jpg'; + my $thumbdir = LANraragi::Model::Config->get_thumbdir; + my $use_jxl = LANraragi::Model::Config->get_jxlthumbpages; + my $format = $use_jxl ? 'jxl' : 'jpg'; my $fallback_format = $format eq 'jxl' ? 'jpg' : 'jxl'; # Thumbnails are stored in the content directory, thumb subfolder. @@ -110,8 +111,8 @@ sub serve_thumbnail { my $subfolder = substr( $id, 0, 2 ); # Check for the page and set the appropriate thumbnail name and fallback thumbnail name - my $thumbbase = ( $page - 1 > 0 ) ? "$thumbdir/$subfolder/$id/$page" : "$thumbdir/$subfolder/$id"; - my $thumbname = "$thumbbase.$format"; + my $thumbbase = ( $page - 1 > 0 ) ? "$thumbdir/$subfolder/$id/$page" : "$thumbdir/$subfolder/$id"; + my $thumbname = "$thumbbase.$format"; my $fallback_thumbname = "$thumbbase.$fallback_format"; # Check if the preferred format thumbnail exists, if not, try the alternate format @@ -123,7 +124,7 @@ sub serve_thumbnail { unless ( -e $thumbname ) { my $job_id = $self->minion->enqueue( thumbnail_task => [ $thumbdir, $id, $page ] => { priority => 0, attempts => 3 } ); - if ( $no_fallback ) { + if ($no_fallback) { $self->render( json => { operation => "serve_thumbnail", @@ -133,11 +134,13 @@ sub serve_thumbnail { status => 202 # 202 Accepted ); } else { + # If the thumbnail doesn't exist, serve the default thumbnail. $self->render_file( filepath => "./public/img/noThumb.png" ); } return; } else { + # Simply serve the thumbnail. $self->render_file( filepath => $thumbname ); } @@ -176,7 +179,7 @@ sub serve_page { # Extract the file from the parent archive if it doesn't exist $logger->debug("Extracting missing file"); - my $redis = LANraragi::Model::Config->get_redis; + my $redis = LANraragi::Model::Config->get_redis; my $archive = $redis->hget( $id, "file" ); $redis->quit(); @@ -249,8 +252,8 @@ sub update_metadata { } # Clean up the user's inputs and encode them. - ( remove_spaces($_) ) for ( $title, $tags ); - ( remove_newlines($_) ) for ( $title, $tags ); + ( $_ = trim($_) ) for ( $title, $tags ); + ( $_ = trim_CRLF($_) ) for ( $title, $tags ); if ( defined $title ) { set_title( $id, $title ); diff --git a/lib/LANraragi/Model/Backup.pm b/lib/LANraragi/Model/Backup.pm index c524b0a54..e186784ad 100644 --- a/lib/LANraragi/Model/Backup.pm +++ b/lib/LANraragi/Model/Backup.pm @@ -9,14 +9,14 @@ use Mojo::JSON qw(decode_json encode_json); use LANraragi::Model::Category; use LANraragi::Utils::Database; -use LANraragi::Utils::Generic qw(remove_newlines); +use LANraragi::Utils::String qw(trim_CRLF); use LANraragi::Utils::Database qw(redis_encode redis_decode invalidate_cache set_title set_tags); use LANraragi::Utils::Logging qw(get_logger); #build_backup_JSON() #Goes through the Redis archive IDs and builds a JSON string containing their metadata. sub build_backup_JSON { - my $redis = LANraragi::Model::Config->get_redis; + my $redis = LANraragi::Model::Config->get_redis; my $logger = get_logger( "Backup/Restore", "lanraragi" ); # Basic structure of the backup object @@ -65,7 +65,7 @@ sub build_backup_JSON { my ( $name, $title, $tags, $thumbhash ) = @hash{qw(name title tags thumbhash)}; ( $_ = redis_decode($_) ) for ( $name, $title, $tags ); - ( remove_newlines($_) ) for ( $name, $title, $tags ); + ( $_ = trim_CRLF($_) ) for ( $name, $title, $tags ); # Backup all user-generated metadata, alongside the unique ID. my %arc = ( diff --git a/lib/LANraragi/Model/Plugins.pm b/lib/LANraragi/Model/Plugins.pm index e7220a681..9b7f8f200 100644 --- a/lib/LANraragi/Model/Plugins.pm +++ b/lib/LANraragi/Model/Plugins.pm @@ -11,7 +11,7 @@ use Mojo::JSON qw(decode_json encode_json); use Mojo::UserAgent; use Data::Dumper; -use LANraragi::Utils::Generic qw(remove_spaces remove_newlines); +use LANraragi::Utils::String qw(trim); use LANraragi::Utils::Database qw(set_tags set_title); use LANraragi::Utils::Archive qw(extract_thumbnail); use LANraragi::Utils::Logging qw(get_logger); @@ -20,7 +20,7 @@ use LANraragi::Utils::Tags qw(rewrite_tags split_tags_to_array); # Sub used by Auto-Plugin. sub exec_enabled_plugins_on_file { - my $id = shift; + my $id = shift; my $logger = get_logger( "Auto-Plugin", "lanraragi" ); $logger->info("Executing enabled metadata plugins on archive with id $id."); @@ -273,7 +273,7 @@ sub exec_metadata_plugin { if ( exists $newmetadata{title} ) { my $newtitle = $newmetadata{title}; - remove_spaces($newtitle); + $newtitle = trim($newtitle); $returnhash{title} = $newtitle; } return %returnhash; diff --git a/lib/LANraragi/Model/Search.pm b/lib/LANraragi/Model/Search.pm index 637cbe868..92acfcd2d 100644 --- a/lib/LANraragi/Model/Search.pm +++ b/lib/LANraragi/Model/Search.pm @@ -9,7 +9,8 @@ use Redis; use Storable qw/ nfreeze thaw /; use Sort::Naturally; -use LANraragi::Utils::Generic qw(split_workload_by_cpu remove_spaces); +use LANraragi::Utils::Generic qw(split_workload_by_cpu); +use LANraragi::Utils::String qw(trim); use LANraragi::Utils::Database qw(redis_decode redis_encode); use LANraragi::Utils::Logging qw(get_logger); @@ -22,7 +23,7 @@ sub do_search { my ( $filter, $category_id, $start, $sortkey, $sortorder, $newonly, $untaggedonly ) = @_; - my $redis = LANraragi::Model::Config->get_redis_search; + my $redis = LANraragi::Model::Config->get_redis_search; my $logger = get_logger( "Search Engine", "lanraragi" ); unless ( $redis->exists("LAST_JOB_TIME") ) { @@ -64,7 +65,7 @@ sub do_search { sub check_cache { my ( $cachekey, $cachekey_inv ) = @_; - my $redis = LANraragi::Model::Config->get_redis_search; + my $redis = LANraragi::Model::Config->get_redis_search; my $logger = get_logger( "Search Cache", "lanraragi" ); my @filtered = (); @@ -190,7 +191,7 @@ sub search_uncached { # If the tag has a namespace, We don't add a wildcard at the start of the tag to keep it intact. # Otherwise, we add a wildcard at the start to match all namespaces. my $indexkey = $tag =~ /:/ ? "INDEX_$tag*" : "INDEX_*$tag*"; - my @keys = $redis->keys($indexkey); + my @keys = $redis->keys($indexkey); # Get the list of IDs for each key foreach my $key (@keys) { @@ -202,7 +203,7 @@ sub search_uncached { # Append fuzzy title search my $namesearch = $isexact ? "$tag\x00*" : "*$tag*"; - my $scan = -1; + my $scan = -1; while ( $scan != 0 ) { # First iteration @@ -373,7 +374,7 @@ sub compute_search_filter { # Escape already present regex characters $logger->debug("Pre-escaped tag: $tag"); - remove_spaces($tag); + $tag = trim($tag); # Escape characters according to redis zscan rules $tag =~ s/([\[\]\^\\])/\\$1/g; @@ -405,10 +406,10 @@ sub sort_results { # (If no tag, defaults to "zzzz") my %tmpfilter = map { $_ => ( $redis->hget( $_, "tags" ) =~ m/.*${re}:(.*)(\,.*|$)/ ) ? $1 : "zzzz" } @filtered; - my @sorted = map { $_->[0] } # Map back to only having the ID - sort { ncmp( $a->[1], $b->[1] ) } # Sort by the tag - map { [ $_, lc( $tmpfilter{$_} ) ] } # Map to an array containing the ID and the lowercased tag - keys %tmpfilter; # List of IDs + my @sorted = map { $_->[0] } # Map back to only having the ID + sort { ncmp( $a->[1], $b->[1] ) } # Sort by the tag + map { [ $_, lc( $tmpfilter{$_} ) ] } # Map to an array containing the ID and the lowercased tag + keys %tmpfilter; # List of IDs if ($sortorder) { @sorted = reverse @sorted; diff --git a/lib/LANraragi/Model/Stats.pm b/lib/LANraragi/Model/Stats.pm index 9c52085d4..f3fad14ab 100644 --- a/lib/LANraragi/Model/Stats.pm +++ b/lib/LANraragi/Model/Stats.pm @@ -8,7 +8,8 @@ use Redis; use File::Find; use Mojo::JSON qw(encode_json); -use LANraragi::Utils::Generic qw(remove_spaces remove_newlines is_archive trim_url); +use LANraragi::Utils::Generic qw(is_archive); +use LANraragi::Utils::String qw(trim trim_CRLF trim_url); use LANraragi::Utils::Database qw(redis_decode redis_encode); use LANraragi::Utils::Logging qw(get_logger); @@ -38,7 +39,7 @@ sub get_archive_count { sub get_page_stat { my $redis = LANraragi::Model::Config->get_redis_config; - my $stat = $redis->get("LRR_TOTALPAGESTAT") || 0; + my $stat = $redis->get("LRR_TOTALPAGESTAT") || 0; $redis->quit(); return $stat; @@ -83,20 +84,19 @@ sub build_stat_hashes { my $rawtags = $redis->hget( $id, "tags" ); # Split tags by comma - my @tags = split( /,\s?/, redis_decode($rawtags) ); + my @tags = split( /,\s?/, redis_decode($rawtags) ); my $has_tags = 0; foreach my $t (@tags) { - remove_spaces($t); - remove_newlines($t); + $t = trim($t); + $t = trim_CRLF($t); # The following are basic and therefore don't count as "tagged" $has_tags = 1 unless $t =~ /(artist|parody|series|language|event|group|date_added|timestamp):.*/; # If the tag is a source: tag, add it to the URL index if ( $t =~ /source:(.*)/i ) { - my $url = $1; - trim_url($url); + my $url = trim_url($1); $logger->trace("Adding $url as an URL for $id"); $redistx->hset( "LRR_URLMAP", $url, $id ); # No need to encode the value, as URLs are already encoded by design } @@ -123,8 +123,8 @@ sub build_stat_hashes { # Decode and lowercase the title $title = lc( redis_decode($title) ); - remove_spaces($title); - remove_newlines($title); + $title = trim($title); + $title = trim_CRLF($title); $title = redis_encode($title); # The LRR_TITLES lexicographically sorted set contains both the title and the id under the form $title\x00$id. @@ -156,7 +156,7 @@ sub is_url_recorded { $logger->debug("Checking if url $url is in the url map."); # Trim last slash from url if it's present - trim_url($url); + $url = trim_url($url); if ( $redis->hexists( "LRR_URLMAP", $url ) ) { $id = $redis->hget( "LRR_URLMAP", $url ); @@ -169,11 +169,11 @@ sub is_url_recorded { sub build_tag_stats { my $minscore = shift; - my $logger = get_logger( "Tag Stats", "lanraragi" ); + my $logger = get_logger( "Tag Stats", "lanraragi" ); $logger->debug("Serving tag statistics with a minimum weight of $minscore"); # Login to Redis and grab the stats sorted set - my $redis = LANraragi::Model::Config->get_redis_search; + my $redis = LANraragi::Model::Config->get_redis_search; my %tagcloud = $redis->zrangebyscore( "LRR_STATS", $minscore, "+inf", "WITHSCORES" ); $redis->quit(); diff --git a/lib/LANraragi/Model/Upload.pm b/lib/LANraragi/Model/Upload.pm index 2a142f0f2..e69370939 100644 --- a/lib/LANraragi/Model/Upload.pm +++ b/lib/LANraragi/Model/Upload.pm @@ -13,7 +13,8 @@ use File::Copy qw(move); use LANraragi::Utils::Database qw(invalidate_cache compute_id); use LANraragi::Utils::Logging qw(get_logger); use LANraragi::Utils::Database qw(redis_encode); -use LANraragi::Utils::Generic qw(is_archive remove_spaces remove_newlines trim_url get_bytelength); +use LANraragi::Utils::Generic qw(is_archive get_bytelength); +use LANraragi::Utils::String qw(trim trim_CRLF trim_url); use LANraragi::Model::Config; use LANraragi::Model::Plugins; @@ -31,8 +32,8 @@ use LANraragi::Model::Category; # Returns a status value, the ID and title of the file, and a status message. sub handle_incoming_file { - my ( $tempfile, $catid, $tags ) = @_; - my ( $filename, $dirs, $suffix ) = fileparse( $tempfile, qr/\.[^.]*/ ); + my ( $tempfile, $catid, $tags ) = @_; + my ( $filename, $dirs, $suffix ) = fileparse( $tempfile, qr/\.[^.]*/ ); $filename = $filename . $suffix; my $logger = get_logger( "File Upload/Download", "lanraragi" ); @@ -57,7 +58,7 @@ sub handle_incoming_file { my $isdupe = $redis->exists($id) && -e $redis->hget( $id, "file" ); # Stop here if file is a dupe and replacement is turned off. - if ((-e $output_file || $isdupe) && !$replace_dupe) { + if ( ( -e $output_file || $isdupe ) && !$replace_dupe ) { # Trash temporary file unlink $tempfile; @@ -75,7 +76,7 @@ sub handle_incoming_file { # If we are replacing an existing one, just remove the old one first. if ($replace_dupe) { $logger->debug("Delete archive $id before replacing it."); - LANraragi::Utils::Database::delete_archive( $id ); + LANraragi::Utils::Database::delete_archive($id); } # Add the file to the database ourselves so Shinobu doesn't do it @@ -91,8 +92,8 @@ sub handle_incoming_file { my @tags = split( /,\s?/, $tags ); foreach my $t (@tags) { - remove_spaces($t); - remove_newlines($t); + $t = trim($t); + $t = trim_CRLF($t); # If the tag is a source: tag, add it to the URL index if ( $t =~ /source:(.*)/i ) { diff --git a/lib/LANraragi/Plugin/Metadata/Eze.pm b/lib/LANraragi/Plugin/Metadata/Eze.pm index d17a1768d..b22a08fa0 100644 --- a/lib/LANraragi/Plugin/Metadata/Eze.pm +++ b/lib/LANraragi/Plugin/Metadata/Eze.pm @@ -13,7 +13,7 @@ use Time::Local qw(timegm_modern); use LANraragi::Model::Plugins; use LANraragi::Utils::Database; use LANraragi::Utils::Logging qw(get_plugin_logger); -use LANraragi::Utils::Generic qw(remove_spaces); +use LANraragi::Utils::String qw(trim); use LANraragi::Utils::Archive qw(is_file_in_archive extract_file_from_archive); #Meta-information about your plugin. @@ -32,8 +32,8 @@ sub plugin_info { "\nB3RJTUUH4wYCFDYBnHlU6AAAAB1pVFh0Q29tbWVudAAAAAAAQ3JlYXRlZCB3aXRoIEdJTVBkLmUH\nAAAETUlEQVQ4y22UTWhTWRTHf/d9JHmNJLFpShMcKoRIqxXE4sKpjgthYLCLggU/wI1CUWRUxlmU\nWblw20WZMlJc1yKKKCjCdDdYuqgRiygq2mL8aJpmQot5uabv3XdnUftG0bu593AOv3M45/yvGBgY\n4OrVqwRBgG3bGIaBbduhDSClxPM8tNZMTEwwMTGB53lYloXWmkgkwqdPnygUCljZbJbW1lYqlQqG\nYYRBjuNw9+5dHj16RD6fJ51O09bWxt69e5mammJ5eZm1tTXi8Tiu6xKNRrlx4wZWNBqlXq8Tj8cx\nTRMhBJZlMT4+zuXLlxFCEIvFqFarBEFAKpXCcRzq9TrpdJparcbIyAiHDh1icXERyzAMhBB4nofv\n+5imiWmavHr1inQ6jeM4ZLNZDMMglUqxuLiIlBLXdfn48SNKKXp6eqhUKiQSCaxkMsna2hqe52Hb\nNsMdec3n8+Pn2+vpETt37qSlpYVyucz8/DzT09Ns3bqVYrEIgOM4RCIRrI1MiUQCz/P43vE8jxcv\nXqCUwvM8Zmdn2bJlC6lUitHRUdrb2zFNE9/3sd6/f4/jOLiuSzKZDCH1wV/EzMwM3d3dNN69o729\nnXK5jFKKPXv2sLS0RF9fHydOnMD3fZRSaK0xtNYEQYBpmtTr9RC4b98+LMsCwLZtHj9+TCwWI5/P\nI6Xk5MmTXLhwAaUUG3MA4M6dOzQaDd68eYOUkqHIZj0U2ay11mzfvp1du3YhhGBgYIDjx4/T3d1N\nvV4nCAKklCilcF2XZrOJlBIBcOnSJc6ePYsQgj9yBf1l//7OJcXPH1Y1wK/Ff8SfvT995R9d/SA8\nzyMaja5Xq7Xm1q1bLCwssLS09M1Atm3bFr67urq+8W8oRUqJlBJLCMHNmze5d+8e2Ww2DPyrsSxq\ntRqZTAattZibm6PZbHJFVoUQgtOxtAbwfR8A13WJxWIYANVqFd/36e/v/ypzIpEgCAKEEMzNzYXN\n34CN/FsSvu+jtSaTyeC67jrw4cOHdHZ2kslkQmCz2SQSiYT269evMU0zhF2RVaH1ejt932dlZYXh\n4eF14MLCArZtI6UMAb+1/qBPx9L6jNOmAY4dO/b/agBnnDb9e1un3vhQzp8/z/Xr19eBQgjevn3L\n1NTUd5WilKJQKGAYxje+lpYWrl27xuTk5PqKARSLRfr6+hgaGiKbzfLy5UvGx8dRSqGUwnEcDMNA\nKYUQIlRGNBplZmaGw4cPE4/HOXDgAMbs7Cy9vb1cvHiR+fl5Hjx4QC6XwzAMYrEYz549Y3p6mufP\nn4d6NU0Tx3GYnJzk6NGjNJtNduzYQUdHB+LL8mu1Gv39/WitGRsb4/79+3R1dbF7925yuVw4/Uaj\nwalTpzhy5AhjY2P4vs/BgwdJp9OYG7ByuUwmk6FUKgFw7tw5SqUSlUqFp0+fkkgk2LRpEysrKzx5\n8oTBwUG01ty+fZv9+/eTz+dZXV3lP31rAEu+yXjEAAAAAElFTkSuQmCC", parameters => [ { type => "bool", desc => "Save archive title" }, - { type => "bool", - desc => "Save the original title when available instead of the English or romanised title" + { type => "bool", + desc => "Save the original title when available instead of the English or romanised title" }, { type => "bool", desc => "Fetch additional timestamp (time posted) and uploader metadata" }, ] @@ -45,25 +45,25 @@ sub plugin_info { sub get_tags { shift; - my $lrr_info = shift; # Global info hash - my ($save_title, $origin_title, $additional_tags) = @_; # Plugin parameters + my $lrr_info = shift; # Global info hash + my ( $save_title, $origin_title, $additional_tags ) = @_; # Plugin parameters my $logger = get_plugin_logger(); my $path_in_archive = is_file_in_archive( $lrr_info->{file_path}, "info.json" ); - my ($name, $path, $suffix) = fileparse($lrr_info->{file_path}, qr/\.[^.]*/); + my ( $name, $path, $suffix ) = fileparse( $lrr_info->{file_path}, qr/\.[^.]*/ ); my $path_nearby_json = $path . $name . '.json'; my $filepath; my $delete_after_parse; - + #Extract info.json - if($path_in_archive) { + if ($path_in_archive) { $filepath = extract_file_from_archive( $lrr_info->{file_path}, $path_in_archive ); $logger->debug("Found file in archive at $filepath"); $delete_after_parse = 1; - } elsif (-e $path_nearby_json) { + } elsif ( -e $path_nearby_json ) { $filepath = $path_nearby_json; $logger->debug("Found file nearby at $filepath"); $delete_after_parse = 0; @@ -75,7 +75,7 @@ sub get_tags { my $stringjson = ""; open( my $fh, '<:encoding(UTF-8)', $filepath ) - or return ( error => "Could not open $filepath!" ); + or return ( error => "Could not open $filepath!" ); while ( my $row = <$fh> ) { chomp $row; @@ -88,9 +88,10 @@ sub get_tags { $logger->debug("Loaded the following JSON: $stringjson"); #Parse it - my ( $tags, $title ) = tags_from_eze_json($origin_title, $additional_tags, $hashjson); + my ( $tags, $title ) = tags_from_eze_json( $origin_title, $additional_tags, $hashjson ); + + if ($delete_after_parse) { - if ($delete_after_parse){ #Delete it unlink $filepath; } @@ -111,7 +112,7 @@ sub get_tags { #Goes through the JSON hash obtained from an info.json file and return the contained tags. sub tags_from_eze_json { - my ($origin_title, $additional_tags, $hash) = @_; + my ( $origin_title, $additional_tags, $hash ) = @_; my $return = ""; #Tags are in gallery_info -> tags -> one array per namespace @@ -120,11 +121,11 @@ sub tags_from_eze_json { # Titles returned by eze are in complete E-H notation. my $title = $hash->{"gallery_info"}->{"title"}; - if ($origin_title && $hash->{"gallery_info"}->{"title_original"} ) { + if ( $origin_title && $hash->{"gallery_info"}->{"title_original"} ) { $title = $hash->{"gallery_info"}->{"title_original"}; } - remove_spaces($title); + $title = trim($title); foreach my $namespace ( sort keys %$tags ) { @@ -139,23 +140,25 @@ sub tags_from_eze_json { } # Add source tag if possible - my $site = $hash->{"gallery_info"}->{"source"}->{"site"}; - my $gid = $hash->{"gallery_info"}->{"source"}->{"gid"}; - my $gtoken = $hash->{"gallery_info"}->{"source"}->{"token"}; - my $category = $hash->{"gallery_info"}->{"category"}; - my $uploader = $hash->{"gallery_info_full"}->{"uploader"}; + my $site = $hash->{"gallery_info"}->{"source"}->{"site"}; + my $gid = $hash->{"gallery_info"}->{"source"}->{"gid"}; + my $gtoken = $hash->{"gallery_info"}->{"source"}->{"token"}; + my $category = $hash->{"gallery_info"}->{"category"}; + my $uploader = $hash->{"gallery_info_full"}->{"uploader"}; my $timestamp = $hash->{"gallery_info_full"}->{"date_uploaded"}; - if ( $timestamp ) { + if ($timestamp) { + # convert microsecond to second $timestamp = $timestamp / 1000; } else { my $upload_date = $hash->{"gallery_info"}->{"upload_date"}; - my $time = timegm_modern($$upload_date[5],$$upload_date[4],$$upload_date[3],$$upload_date[2],$$upload_date[1]-1,$$upload_date[0]); + my $time = timegm_modern( $$upload_date[5], $$upload_date[4], $$upload_date[3], $$upload_date[2], $$upload_date[1] - 1, + $$upload_date[0] ); $timestamp = $time; } - if ( $category ) { + if ($category) { $return .= ", category:$category"; } diff --git a/lib/LANraragi/Plugin/Metadata/Fakku.pm b/lib/LANraragi/Plugin/Metadata/Fakku.pm index 8879650d8..019b58d2a 100644 --- a/lib/LANraragi/Plugin/Metadata/Fakku.pm +++ b/lib/LANraragi/Plugin/Metadata/Fakku.pm @@ -13,7 +13,7 @@ use Mojo::DOM; #You can also use the LRR Internal API when fitting. use LANraragi::Model::Plugins; use LANraragi::Utils::Logging qw(get_plugin_logger); -use LANraragi::Utils::Generic qw(remove_spaces remove_newlines); +use LANraragi::Utils::String qw(trim trim_CRLF); #Meta-information about your plugin. sub plugin_info { @@ -179,7 +179,7 @@ sub get_tags_from_fakku { my $metadata_parent = $tags_parent->parent->parent; my $title = $metadata_parent->at('h1')->text; - remove_spaces($title); + $title = trim($title); $logger->debug("Parsed title: $title"); my @tags = (); @@ -201,8 +201,8 @@ sub get_tags_from_fakku { ? $row[1]->at('a')->text : $row[1]->text; - remove_spaces($value); - remove_newlines($value); + $value = trim($value); + $value = trim_CRLF($value); $logger->debug("Parsed row: $namespace"); $logger->debug("Matching tag: $value"); @@ -223,8 +223,8 @@ sub get_tags_from_fakku { foreach my $link (@tag_links) { my $tag = $link->text; - remove_spaces($tag); - remove_newlines($tag); + $tag = trim($tag); + $tag = trim_CRLF($tag); unless ( $tag eq "+" || $tag eq "" ) { push( @tags, lc $tag ); } diff --git a/lib/LANraragi/Plugin/Metadata/Koushoku.pm b/lib/LANraragi/Plugin/Metadata/Koushoku.pm index b397511b0..06eb46570 100644 --- a/lib/LANraragi/Plugin/Metadata/Koushoku.pm +++ b/lib/LANraragi/Plugin/Metadata/Koushoku.pm @@ -13,7 +13,7 @@ use Mojo::DOM; #You can also use the LRR Internal API when fitting. use LANraragi::Model::Plugins; use LANraragi::Utils::Logging qw(get_plugin_logger); -use LANraragi::Utils::Generic qw(remove_spaces); +use LANraragi::Utils::String qw(trim); #Meta-information about your plugin. sub plugin_info { @@ -135,7 +135,7 @@ sub get_tags_from_ksk { # Title is the first h1 block my $title = $dom->at('h1')->text; - remove_spaces($title); + $title = trim($title); $logger->debug("Parsed title: $title"); # Get all the links with rel="tag" @@ -153,7 +153,7 @@ sub get_tags_from_ksk { # url-decode it before pushing my $tag = uri_unescape($2); - remove_spaces($tag); + $tag = trim($tag); if ( $1 eq "artists" ) { $tag = "artist:" . $tag; diff --git a/lib/LANraragi/Plugin/Metadata/RegexParse.pm b/lib/LANraragi/Plugin/Metadata/RegexParse.pm index 3d02eab6d..5dbdd55cf 100644 --- a/lib/LANraragi/Plugin/Metadata/RegexParse.pm +++ b/lib/LANraragi/Plugin/Metadata/RegexParse.pm @@ -13,7 +13,6 @@ use Scalar::Util qw(looks_like_number); use LANraragi::Model::Plugins; use LANraragi::Utils::Database qw(redis_encode redis_decode); use LANraragi::Utils::Logging qw(get_logger); -use LANraragi::Utils::Generic qw(remove_spaces); #Meta-information about your plugin. sub plugin_info { @@ -42,7 +41,7 @@ sub get_tags { my ($savetitle) = @_; # Plugin parameters my $logger = get_logger( "regexparse", "plugins" ); - my $file = $lrr_info->{file_path}; + my $file = $lrr_info->{file_path}; # lrr_info's file_path is taken straight from the filesystem, which might not be proper UTF-8. # Run a decode to make sure we can derive tags with the proper encoding. diff --git a/lib/LANraragi/Plugin/Scripts/SourceFinder.pm b/lib/LANraragi/Plugin/Scripts/SourceFinder.pm index 04d9fe406..5f953fc4f 100644 --- a/lib/LANraragi/Plugin/Scripts/SourceFinder.pm +++ b/lib/LANraragi/Plugin/Scripts/SourceFinder.pm @@ -7,7 +7,7 @@ no warnings 'uninitialized'; use Mojo::UserAgent; use LANraragi::Utils::Logging qw(get_plugin_logger); use LANraragi::Model::Stats; -use LANraragi::Utils::Generic qw(trim_url); +use LANraragi::Utils::String qw(trim_url); #Meta-information about your plugin. sub plugin_info { diff --git a/lib/LANraragi/Utils/Database.pm b/lib/LANraragi/Utils/Database.pm index d6cac9eba..32897cb84 100644 --- a/lib/LANraragi/Utils/Database.pm +++ b/lib/LANraragi/Utils/Database.pm @@ -16,7 +16,8 @@ use Redis; use Cwd; use Unicode::Normalize; -use LANraragi::Utils::Generic qw(flat remove_spaces remove_newlines trim_url); +use LANraragi::Utils::Generic qw(flat); +use LANraragi::Utils::String qw(trim trim_CRLF trim_url); use LANraragi::Utils::Tags qw(unflat_tagrules tags_rules_to_array restore_CRLF); use LANraragi::Utils::Archive qw(get_filelist); use LANraragi::Utils::Logging qw(get_logger); @@ -59,7 +60,7 @@ sub add_archive_to_redis ( $id, $file, $redis ) { sub change_archive_id ( $old_id, $new_id ) { my $logger = get_logger( "Archive", "lanraragi" ); - my $redis = LANraragi::Model::Config->get_redis; + my $redis = LANraragi::Model::Config->get_redis; $logger->debug("Changing ID $old_id to $new_id"); @@ -172,7 +173,8 @@ sub build_json ( $id, %hash ) { # It's not a new archive, but it might have never been clicked on yet, # so grab the value for $isnew stored in redis. - my ( $name, $title, $tags, $file, $isnew, $progress, $pagecount, $lastreadtime) = @hash{qw(name title tags file isnew progress pagecount lastreadtime)}; + my ( $name, $title, $tags, $file, $isnew, $progress, $pagecount, $lastreadtime ) = + @hash{qw(name title tags file isnew progress pagecount lastreadtime)}; # Return undef if the file doesn't exist. return unless ( defined($file) && -e $file ); @@ -186,13 +188,13 @@ sub build_json ( $id, %hash ) { } my $arcdata = { - arcid => $id, - title => $title, - tags => $tags, - isnew => $isnew ? $isnew : "false", - extension => lc( ( split( /\./, $file ) )[-1] ), - progress => $progress ? int($progress) : 0, - pagecount => $pagecount ? int($pagecount) : 0, + arcid => $id, + title => $title, + tags => $tags, + isnew => $isnew ? $isnew : "false", + extension => lc( ( split( /\./, $file ) )[-1] ), + progress => $progress ? int($progress) : 0, + pagecount => $pagecount ? int($pagecount) : 0, lastreadtime => $lastreadtime ? int($lastreadtime) : 0 }; @@ -208,8 +210,8 @@ sub delete_archive($id) { $oldtags = redis_decode($oldtags); my $oldtitle = lc( redis_decode( $redis->hget( $id, "title" ) ) ); - remove_spaces($oldtitle); - remove_newlines($oldtitle); + $oldtitle = trim($oldtitle); + $oldtitle = trim_CRLF($oldtitle); $oldtitle = redis_encode($oldtitle); $redis->del($id); @@ -278,7 +280,7 @@ sub clean_database { # Get the filemap for ID checks later down the line my @filemapids = $redis_config->exists("LRR_FILEMAP") ? $redis_config->hvals("LRR_FILEMAP") : (); - my %filemap = map { $_ => 1 } @filemapids; + my %filemap = map { $_ => 1 } @filemapids; #40-character long keys only => Archive IDs my @keys = $redis->keys('????????????????????????????????????????'); @@ -346,8 +348,8 @@ sub set_title ( $id, $newtitle ) { # Remove old title from search set if ( $redis->hexists( $id, "title" ) ) { my $oldtitle = lc( redis_decode( $redis->hget( $id, "title" ) ) ); - remove_spaces($oldtitle); - remove_newlines($oldtitle); + $oldtitle = trim($oldtitle); + $oldtitle = trim_CRLF($oldtitle); $oldtitle = redis_encode($oldtitle); $redis_search->zrem( "LRR_TITLES", "$oldtitle\0$id" ); } @@ -357,8 +359,8 @@ sub set_title ( $id, $newtitle ) { # Set title/ID key in search set $newtitle = lc($newtitle); - remove_spaces($newtitle); - remove_newlines($newtitle); + $newtitle = trim($newtitle); + $newtitle = trim_CRLF($newtitle); $newtitle = redis_encode($newtitle); $redis_search->zadd( "LRR_TITLES", 0, "$newtitle\0$id" ); } @@ -370,7 +372,7 @@ sub set_title ( $id, $newtitle ) { # Set $append to 1 if you want to append the tags instead of replacing them. sub set_tags ( $id, $newtags, $append = 0 ) { - my $redis = LANraragi::Model::Config->get_redis; + my $redis = LANraragi::Model::Config->get_redis; my $oldtags = $redis->hget( $id, "tags" ); $oldtags = redis_decode($oldtags); @@ -380,7 +382,7 @@ sub set_tags ( $id, $newtags, $append = 0 ) { unless ( length $newtags ) { return; } if ($oldtags) { - remove_spaces($oldtags); + $oldtags = trim($oldtags); if ( $oldtags ne "" ) { $newtags = $oldtags . "," . $newtags; @@ -433,8 +435,7 @@ sub update_indexes ( $id, $oldtags, $newtags ) { foreach my $tag (@oldtags) { if ( $tag =~ /source:(.*)/i ) { - my $url = $1; - trim_url($url); + my $url = trim_url($1); $redis->hdel( "LRR_URLMAP", $url ); } @@ -449,8 +450,7 @@ sub update_indexes ( $id, $oldtags, $newtags ) { # If the tag is a source: tag, add it to the URL index if ( $tag =~ /source:(.*)/i ) { - my $url = $1; - trim_url($url); + my $url = trim_url($1); $redis->hset( "LRR_URLMAP", $url, $id ); } @@ -533,7 +533,7 @@ sub save_computed_tagrules($tagrules) { $redis->del("LRR_TAGRULES"); if (@$tagrules) { - my @flat = reverse flat(@$tagrules); + my @flat = reverse flat(@$tagrules); my @encoded_flat = map { redis_encode($_) } @flat; $redis->lpush( "LRR_TAGRULES", @encoded_flat ); } @@ -549,7 +549,7 @@ sub get_computed_tagrules { if ( $redis->exists("LRR_TAGRULES") ) { my @flattened_rules = $redis->lrange( "LRR_TAGRULES", 0, -1 ); - my @decoded_rules = map { redis_decode($_) } @flattened_rules; + my @decoded_rules = map { redis_decode($_) } @flattened_rules; @tagrules = unflat_tagrules( \@decoded_rules ); } else { @tagrules = tags_rules_to_array( restore_CRLF( LANraragi::Model::Config->get_tagrules ) ); @@ -574,12 +574,12 @@ sub get_tankoubons_by_file($arcid) { return (); } - my @tanks = $redis->keys('TANK_??????????'); + my @tanks = $redis->keys('TANK_??????????'); - foreach my $key (sort @tanks) { + foreach my $key ( sort @tanks ) { - if ($redis->zscore($key, $arcid)) { - push( @tankoubons, $key) + if ( $redis->zscore( $key, $arcid ) ) { + push( @tankoubons, $key ); } } diff --git a/lib/LANraragi/Utils/Generic.pm b/lib/LANraragi/Utils/Generic.pm index 13b4e6458..ef858564c 100644 --- a/lib/LANraragi/Utils/Generic.pm +++ b/lib/LANraragi/Utils/Generic.pm @@ -15,44 +15,14 @@ use Proc::Simple; use Sys::CpuAffinity; use LANraragi::Utils::TempFolder qw(get_temp); +use LANraragi::Utils::String qw(trim); use LANraragi::Utils::Logging qw(get_logger); # Generic Utility Functions. use Exporter 'import'; -our @EXPORT_OK = - qw(remove_spaces remove_newlines trim_url is_image is_archive render_api_response get_tag_with_namespace shasum start_shinobu +our @EXPORT_OK = qw(is_image is_archive render_api_response get_tag_with_namespace shasum start_shinobu split_workload_by_cpu start_minion get_css_list generate_themes_header flat get_bytelength array_difference); -# Remove spaces before and after a word -sub remove_spaces { - if ( $_[0] ) { - $_[0] =~ s/^\s+|\s+$//g; - } -} - -# Remove all newlines in a string -sub remove_newlines { - if ( $_[0] ) { - $_[0] =~ s/\R//g; - } -} - -# Fixes up a URL string for use in the DL system. -sub trim_url { - - remove_spaces( $_[0] ); - - # Remove scheme, www. and query parameters if present. Other subdomains are not removed - if ( $_[0] =~ /https?:\/\/(www\.)?([^\?]*)\??.*/gm ) { - $_[0] = $2; - } - - my $char = chop $_[0]; - if ( $char ne "/" ) { - $_[0] .= $char; - } -} - # Checks if the provided file is an image. # Uses non-capturing groups (?:) to avoid modifying the incoming argument. sub is_image { @@ -72,10 +42,10 @@ sub render_api_response { $mojo->render( json => { - operation => $operation, - error => $failed ? $errormessage : "", - success => $failed ? 0 : 1, - successMessage => $failed ? "" : $successMessage, + operation => $operation, + error => $failed ? $errormessage : "", + success => $failed ? 0 : 1, + successMessage => $failed ? "" : $successMessage, }, status => $failed ? 400 : 200 ); @@ -88,8 +58,8 @@ sub get_tag_with_namespace { foreach my $tag (@values) { my ( $namecheck, $value ) = split( ':', $tag ); - remove_spaces($namecheck); - remove_spaces($value); + $namecheck = trim($namecheck); + $value = trim($value); if ( $namecheck eq $namespace ) { return $value; @@ -119,7 +89,7 @@ sub split_workload_by_cpu { # Start a Minion worker if there aren't any available. sub start_minion { - my $mojo = shift; + my $mojo = shift; my $logger = get_logger( "Minion", "minion" ); my $numcpus = Sys::CpuAffinity::getNumCpus(); @@ -150,8 +120,8 @@ sub start_minion { } sub _spawn { - my ( $job, $pid ) = @_; - my ( $id, $task ) = ( $job->id, $job->task ); + my ( $job, $pid ) = @_; + my ( $id, $task ) = ( $job->id, $job->task ); my $logger = get_logger( "Minion Worker", "minion" ); $job->app->log->debug(qq{Process $pid is performing job "$id" with task "$task"}); } @@ -272,15 +242,15 @@ sub array_difference { my %seen; my @difference; - + # Add all elements from array1 to the hash $seen{$_} = 1 for @$array1; - + # Check elements in array2 and add the ones not seen in array1 to the difference array foreach my $element (@$array2) { push @difference, $element unless $seen{$element}; } - + return @difference; } diff --git a/lib/LANraragi/Utils/Minion.pm b/lib/LANraragi/Utils/Minion.pm index 7ddf57fe6..b9056cd56 100644 --- a/lib/LANraragi/Utils/Minion.pm +++ b/lib/LANraragi/Utils/Minion.pm @@ -11,7 +11,8 @@ use LANraragi::Utils::Logging qw(get_logger); use LANraragi::Utils::Database qw(redis_decode); use LANraragi::Utils::Archive qw(extract_thumbnail extract_archive); use LANraragi::Utils::Plugins qw(get_downloader_for_url get_plugin get_plugin_parameters use_plugin); -use LANraragi::Utils::Generic qw(trim_url split_workload_by_cpu); +use LANraragi::Utils::Generic qw(split_workload_by_cpu); +use LANraragi::Utils::String qw(trim_url); use LANraragi::Utils::TempFolder qw(get_temp); use LANraragi::Model::Upload; @@ -30,7 +31,7 @@ sub add_tasks { my $logger = get_logger( "Minion", "minion" ); # Non-cover thumbnails are rendered in low quality by default. - my $use_hq = $page eq 0 || LANraragi::Model::Config->get_hqthumbpages; + my $use_hq = $page eq 0 || LANraragi::Model::Config->get_hqthumbpages; my $thumbname = ""; eval { $thumbname = extract_thumbnail( $thumbdir, $id, $page, $use_hq ); }; @@ -72,8 +73,8 @@ sub add_tasks { sub { foreach my $id (@$_) { - my $use_jxl = LANraragi::Model::Config->get_jxlthumbpages; - my $format = $use_jxl ? 'jxl' : 'jpg'; + my $use_jxl = LANraragi::Model::Config->get_jxlthumbpages; + my $format = $use_jxl ? 'jxl' : 'jpg'; my $subfolder = substr( $id, 0, 2 ); my $thumbname = "$thumbdir/$subfolder/$id.$format"; @@ -150,7 +151,7 @@ sub add_tasks { my ( $job, @args ) = @_; my ( $url, $catid ) = @args; - my $ua = Mojo::UserAgent->new; + my $ua = Mojo::UserAgent->new; my $logger = get_logger( "Minion", "minion" ); $logger->info("Downloading url $url..."); diff --git a/lib/LANraragi/Utils/String.pm b/lib/LANraragi/Utils/String.pm index f49e6c11a..f111723b8 100644 --- a/lib/LANraragi/Utils/String.pm +++ b/lib/LANraragi/Utils/String.pm @@ -9,11 +9,16 @@ use feature qw(signatures); use String::Similarity; +use Exporter 'import'; +our @EXPORT_OK = qw(clean_title trim trim_CRLF trim_url most_similar); + # Remove "junk" from titles, turning something like "(c12) [poop (butt)] hardcore handholding [monogolian] [recensored]" into "hardcore handholding" sub clean_title($title) { $title = trim($title); + # Remove leading "(c12)" $title =~ s/^\([^)]*\)?\s?//g; + # Remove leading "[poop (butt)]" $title =~ s/^\[[^]]*\]?\s?//g; @@ -23,26 +28,50 @@ sub clean_title($title) { return $title; } +# Remove spaces before and after a word sub trim($s) { $s =~ s/^\s+|\s+$//g; - return $s + return $s; +} + +# Remove all newlines in a string +sub trim_CRLF($s) { + $s =~ s/\R//g; + return $s; +} + +# Fixes up a URL string for use in the DL system. +sub trim_url($url) { + + $url = trim($url); + + # Remove scheme, www. and query parameters if present. Other subdomains are not removed + if ( $url =~ /https?:\/\/(www\.)?([^\?]*)\??.*/gm ) { + $url = $2; + } + + my $char = chop $url; + if ( $char ne "/" ) { + $url .= $char; + } + return $url; } # Finds the index of the string in @values that is most similar to $tested_string. Returns undef if @values is empty. # If multiple rows score "first place", the first one is returned -sub most_similar($tested_string, @values) { - if (!@values) { +sub most_similar ( $tested_string, @values ) { + if ( !@values ) { return; } my $best_similarity = 0.0; - my $best_index = undef; + my $best_index = undef; - while (my ($index, $elem) = each @values) { - my $similarity = similarity($tested_string, $elem); - if (!defined($best_index) || $similarity > $best_similarity) { + while ( my ( $index, $elem ) = each @values ) { + my $similarity = similarity( $tested_string, $elem ); + if ( !defined($best_index) || $similarity > $best_similarity ) { $best_similarity = $similarity; - $best_index = $index; + $best_index = $index; } } return $best_index; diff --git a/lib/LANraragi/Utils/Tags.pm b/lib/LANraragi/Utils/Tags.pm index 8765963cb..602b98377 100644 --- a/lib/LANraragi/Utils/Tags.pm +++ b/lib/LANraragi/Utils/Tags.pm @@ -6,12 +6,11 @@ use utf8; use feature "switch"; no warnings 'experimental'; -use LANraragi::Utils::Generic qw(remove_spaces remove_newlines); +use LANraragi::Utils::String qw(trim trim_CRLF); # Functions related to the Tag system. use Exporter 'import'; -our @EXPORT_OK = - qw( unflat_tagrules replace_CRLF restore_CRLF tags_rules_to_array rewrite_tags split_tags_to_array ); +our @EXPORT_OK = qw( unflat_tagrules replace_CRLF restore_CRLF tags_rules_to_array rewrite_tags split_tags_to_array ); sub is_null_or_empty { return !length(shift); @@ -19,63 +18,63 @@ sub is_null_or_empty { sub replace_CRLF { my ($val) = @_; - $val =~ s/\x{d}\x{a}/;/g if ( $val ); + $val =~ s/\x{d}\x{a}/;/g if ($val); return $val; } sub restore_CRLF { my ($val) = @_; - $val =~ s/;/\x{d}\x{a}/g if ( $val ); + $val =~ s/;/\x{d}\x{a}/g if ($val); return $val; } sub unflat_tagrules { - my ( $flattened_rules ) = @_; + my ($flattened_rules) = @_; my @tagrules = (); - while (@{$flattened_rules || []}) { - push(@tagrules, [ splice(@$flattened_rules, 0, 3) ]); + while ( @{ $flattened_rules || [] } ) { + push( @tagrules, [ splice( @$flattened_rules, 0, 3 ) ] ); } return @tagrules; } sub split_tags_to_array { - my ( $tags_string ) = @_; - my @tags = split( ',', $tags_string ); + my ($tags_string) = @_; + my @tags = split( ',', $tags_string ); foreach my $tags (@tags) { - remove_spaces($tags); - remove_newlines($tags); + $tags = trim($tags); + $tags = trim_CRLF($tags); } return @tags; } sub tags_rules_to_array { - my ( $text_rules ) = @_; + my ($text_rules) = @_; my @rules; my @lines = split( '\n', $text_rules ); - foreach my $line ( @lines ) { + foreach my $line (@lines) { my ( $match, $value ) = split( '->', $line ); - remove_spaces($match); - remove_spaces($value); - if (!is_null_or_empty($match)) { + $match = trim($match); + $value = trim($value); + if ( !is_null_or_empty($match) ) { my $rule_type; if ( !$value && $match =~ m/^-.*:\*$/ ) { $rule_type = 'remove_ns'; - $match = substr ($match, 1, length($match)-3); + $match = substr( $match, 1, length($match) - 3 ); } elsif ( !$value && $match =~ m/^-/ ) { $rule_type = 'remove'; - $match = substr ($match, 1); + $match = substr( $match, 1 ); } elsif ( !$value && $match =~ m/^~/ ) { $rule_type = 'strip_ns'; - $match = substr ($match, 1); + $match = substr( $match, 1 ); } elsif ( $match =~ m/:\*$/ && $value =~ m/:\*$/ ) { $rule_type = 'replace_ns'; - $match = substr ($match, 0, length($match)-2); - $value = substr ($value, 0, length($value)-2); + $match = substr( $match, 0, length($match) - 2 ); + $value = substr( $value, 0, length($value) - 2 ); } elsif ( !$value ) { - $rule_type = 'remove'; # blacklist mode + $rule_type = 'remove'; # blacklist mode } else { - $rule_type = 'replace' + $rule_type = 'replace'; } push( @rules, [ $rule_type, lc $match, $value || '' ] ) if ($rule_type); @@ -89,9 +88,9 @@ sub rewrite_tags { return @$tags if ( !@$rules ); my @parsed_tags; - foreach my $tag ( @$tags ) { - my $new_tag = apply_rules($tag, $rules); - push(@parsed_tags, $new_tag) if ($new_tag); + foreach my $tag (@$tags) { + my $new_tag = apply_rules( $tag, $rules ); + push( @parsed_tags, $new_tag ) if ($new_tag); } return @parsed_tags; } @@ -99,10 +98,10 @@ sub rewrite_tags { sub apply_rules { my ( $tag, $rules ) = @_; - foreach my $rule ( @$rules ) { + foreach my $rule (@$rules) { my $match = $rule->[1]; my $value = $rule->[2]; - given($rule->[0]) { + given ( $rule->[0] ) { when ('remove') { return if ( lc $tag eq $match ); } when ('remove_ns') { return if ( $tag =~ m/^$match:/i ); } when ('replace_ns') { $tag =~ s/^\Q$match:/$value\:/i; } diff --git a/tests/LANraragi/Utils/String.t b/tests/LANraragi/Utils/String.t index ee25e1f10..e4a470ae7 100644 --- a/tests/LANraragi/Utils/String.t +++ b/tests/LANraragi/Utils/String.t @@ -8,46 +8,45 @@ use Test::Deep; BEGIN { use_ok('LANraragi::Utils::String'); } - note('testing trim...'); { - my $input = ""; + my $input = ""; my $expected = ""; - my $result = LANraragi::Utils::String::trim($input); + my $result = LANraragi::Utils::String::trim($input); - is($result, $expected, "Empty string should result in empty string"); + is( $result, $expected, "Empty string should result in empty string" ); } { - my $input = "already trimmed"; + my $input = "already trimmed"; my $expected = "already trimmed"; - my $result = LANraragi::Utils::String::trim($input); + my $result = LANraragi::Utils::String::trim($input); - is($result, $expected, "Pre-trimmed should do nothing"); + is( $result, $expected, "Pre-trimmed should do nothing" ); } { - my $input = " trim everything "; + my $input = " trim everything "; my $expected = "trim everything"; - my $result = LANraragi::Utils::String::trim($input); + my $result = LANraragi::Utils::String::trim($input); - is($result, $expected, "Trim should trim"); + is( $result, $expected, "Trim should trim" ); + is( $input, " trim everything ", "Trim doesn't modify the input variable" ); } - note('testing title cleanup...'); { - my $input = "(C83) [Tetsubou Shounen (Natsushi)] So hold my hand one more time [English] [Yuri-ism Project]"; + my $input = "(C83) [Tetsubou Shounen (Natsushi)] So hold my hand one more time [English] [Yuri-ism Project]"; my $expected = "So hold my hand one more time"; - my $result = LANraragi::Utils::String::clean_title($input); + my $result = LANraragi::Utils::String::clean_title($input); - is($result, $expected, "Remove leading/trailing junk"); + is( $result, $expected, "Remove leading/trailing junk" ); } note('testing string similarity detection...'); { - is(LANraragi::Utils::String::most_similar("orange", ("door hinge", "sporange")), 1, "Simple case"); - is(LANraragi::Utils::String::most_similar("orange", ()), undef, "Empty set"); + is( LANraragi::Utils::String::most_similar( "orange", ( "door hinge", "sporange" ) ), 1, "Simple case" ); + is( LANraragi::Utils::String::most_similar( "orange", () ), undef, "Empty set" ); } done_testing();