Skip to content

Commit

Permalink
Remove redundant compression step
Browse files Browse the repository at this point in the history
  • Loading branch information
caufieldjh committed Sep 24, 2024
1 parent b288a21 commit fdd8775
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 19 deletions.
9 changes: 1 addition & 8 deletions src/kg_bioportal/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,22 +209,15 @@ def transform(input_dir, output_dir, compress) -> None:
"--input_dir", "-i", default="data/transformed", type=click.Path(exists=True)
)
@click.option("--output_dir", "-o", default="data/merged")
@click.option(
"--compress",
"-c",
is_flag=True,
default=True,
help="If true, compresses the output nodes and edges to tar.gz. Defaults to True.",
)
def merge(input_dir, output_dir, compress) -> None:
"""Merges all edges and nodes in the input directory to a single KGX graph.
Compresses all nodes and edges to tar.gz.
Yields one log files: merged_graph_stats.yaml.
Args:
input_dir: A string pointing to the directory to import data from.
output_dir: A string pointing to the directory to output data to.
compress: If true, compresses the output nodes and edges to tar.gz. Defaults to True.
Returns:
None.
Expand Down
14 changes: 3 additions & 11 deletions src/kg_bioportal/merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def __init__(

return None

def merge_all(self, compress: bool) -> None:
def merge_all(self) -> None:
"""Merges all ontologies in the input directory to a single graph.
Yields one log files: merged_graph_stats.yaml.
Expand Down Expand Up @@ -97,7 +97,7 @@ def merge_all(self, compress: bool) -> None:

for filepath in filepaths:
ontology_name = filepath[0]
success, nodecount, edgecount = self.merge(nodelist, edgelist, compress)
success, nodecount, edgecount = self.merge(nodelist, edgelist)
if not success:
logging.error("Error in merging.")
status = False
Expand All @@ -108,7 +108,7 @@ def merge_all(self, compress: bool) -> None:
return None

def merge(
self, nodelist: List[str], edgelist: List[str], compress: bool
self, nodelist: List[str], edgelist: List[str]
) -> Tuple[bool, int, int]:
"""Merge graph files using cat_merge.
Expand Down Expand Up @@ -139,14 +139,6 @@ def merge(
qc_report=True,
)

if compress:
logging.info("Compressing merged graph.")
with tarfile.open(
os.path.join(self.output_dir, "merged_graph.tar.gz"), "w:gz"
) as tar:
tar.add(os.path.join(self.output_dir, "kg_bioportal_nodes.tsv"))
tar.add(os.path.join(self.output_dir, "kg_bioportal_edges.tsv"))

return status, nodecount, edgecount

def decompress(
Expand Down

0 comments on commit fdd8775

Please sign in to comment.