Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(#501): enables to copy file from cluster into local container #725

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 69 additions & 2 deletions client/gefyra/api/run.py
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

First of all thank you veeeeery much for this PR! This will reduce the number of things which otherwise need to be done in some wrapper script!

pls allow me to add some general questions/comments:

  • re parameter name « —file-from »: what would be the parameter name/syntax for future features like « copy all mounted files » (no more need to list out all the different dirs; or parameter name/syntax for the future feature « live file/folder sync between pod and container », which is needed e.g. to handle externally rotated cert files or for shared mounts between pod containers. - For now the background of my question is just to make sure that there won’t be any parameter name/syntax conflict for those future features.
  • by what is tar file size constrained? (in our case we are talking about tiny files, so we should be fine.)
  • does it make sense to allow batching of these tar runs? (only if they have significant overhead) - in our case we have to copy about 10 files/paths for a single container
  • can we make destination file/dir param optional? usually it will be same as source file/dir ?

If I can help with more details, I’d more than happy to do so. Please feel free to reach out to me on email or phone any time.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree to most of the mentioned points, however, cannot answer all, especially the first point.

by what is tar file size constrained? (in our case we are talking about tiny files, so we should be fine.)

@SteinRobert brought up the same point. I will see how this can be implemented. We thought that a limit of around 50MB max might be good. I personally only transfer files of some KB, thus I don't have any feeling for a reasonable limit for other use cases.

does it make sense to allow batching of these tar runs?

I guess. I will check it out. This would also reduce the count of connections etc and might play nice together with the transfer limit.

can we make destination file/dir param optional? usually it will be same as source file/dir ?

Yes.

Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
import os
import sys
from threading import Thread, Event
from typing import Dict, List, Optional, TYPE_CHECKING
from gefyra.cluster.utils import retrieve_pod_and_container
from typing import Dict, List, Optional, TYPE_CHECKING, Tuple
from gefyra.cli.utils import FileFromArgument
from gefyra.cluster.utils import get_file_from_pod_container, retrieve_pod_and_container

if TYPE_CHECKING:
from docker.models.containers import Container
Expand All @@ -30,6 +31,62 @@ def check_input():
stop_thread.set()


def copy_files_or_directories_from_cluster_to_local_container(
config,
namespace: str,
local_container_name: str,
file_from: Tuple[FileFromArgument],
):
from kubernetes.client import ApiException

for file_from_argument in file_from:
try:
file_from_pod, file_from_container = retrieve_pod_and_container(
file_from_argument.workload, namespace=namespace, config=config
)
except ApiException as e:
logger.error(
f"Cannot copy file/directory from pod: {e.reason} ({e.status})."
)
continue

logger.debug(
f"Copying file/directory from {file_from_pod}/{file_from_container}"
)

try:
tar_buffer = get_file_from_pod_container(
config,
file_from_pod,
namespace,
file_from_container,
file_from_argument.source,
)
except (RuntimeError, ApiException) as e:
logger.info(
f"Cannot copy file/directory from pod:"
f" --file-from {str(file_from_argument.workload)}:"
f"{file_from_argument.source}:{file_from_argument.destination}"
)
logger.debug(e)
continue

try:
client = config.DOCKER
client.containers.get(local_container_name).put_archive(
file_from_argument.destination,
tar_buffer.read(),
)
except Exception as e:
logger.info(
f"Cannot copy file/directory into container:"
f" --file-from {str(file_from_argument.workload)}:"
f"{file_from_argument.source}:{file_from_argument.destination}"
)
logger.debug(e)
continue


@stopwatch
def run(
image: str,
Expand All @@ -43,6 +100,7 @@ def run(
namespace: str = "",
env: Optional[List] = None,
env_from: str = "",
file_from: Optional[Tuple[FileFromArgument]] = None,
pull: str = "missing",
platform: str = "linux/amd64",
) -> bool:
Expand Down Expand Up @@ -100,6 +158,7 @@ def run(
except ApiException as e:
logger.error(f"Cannot copy environment from Pod: {e.reason} ({e.status}).")
return False

if env:
env_overrides = generate_env_dict_from_strings(env)
env_dict.update(env_overrides)
Expand Down Expand Up @@ -131,6 +190,14 @@ def run(
else:
raise RuntimeError(e.explanation)

if file_from:
copy_files_or_directories_from_cluster_to_local_container(
config,
namespace=namespace,
local_container_name=name,
file_from=file_from,
)

logger.info(
f"Container image '{', '.join(container.image.tags)}' started with name"
f" '{container.name}' in Kubernetes namespace '{namespace}' (from {ns_source})"
Expand Down
11 changes: 11 additions & 0 deletions client/gefyra/cli/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
OptionEatAll,
check_connection_name,
parse_env,
parse_file_from,
parse_ip_port_map,
parse_workload,
)
Expand Down Expand Up @@ -40,6 +41,13 @@
type=str,
callback=parse_workload,
)
@click.option(
"--file-from",
help="Copy the file from the container in the notation 'Pod/Container'",
type=str,
callback=parse_file_from,
multiple=True,
)
@click.option(
"-v",
"--volume",
Expand Down Expand Up @@ -102,6 +110,7 @@ def run(
auto_remove,
expose,
env_from,
file_from,
volume,
env,
namespace,
Expand All @@ -116,12 +125,14 @@ def run(

if command:
command = ast.literal_eval(command)[0]

api.run(
image=image,
name=name,
command=command,
namespace=namespace,
env_from=env_from,
file_from=file_from,
env=env,
ports=expose,
auto_remove=auto_remove,
Expand Down
37 changes: 37 additions & 0 deletions client/gefyra/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
import click
from click import ClickException
from typing import NamedTuple


def standard_error_handler(func):
Expand Down Expand Up @@ -237,6 +238,42 @@ def parse_workload(ctx, param, workload: str) -> str:
return workload


class FileFromArgument(NamedTuple):
workload: str
source: str
destination: str = "/"


def parse_file_from(ctx, param, file_from_arguments) -> Tuple[FileFromArgument]:
if not file_from_arguments:
return None

MSG = (
"Invalid argument format. Please provide the file "
"in the format 'type/name:source:destination' or "
"'type/name/container-name:source:destination'."
Copy link

@crkurz crkurz Oct 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we make destination optional? (I would suspect in most of the cases users will want the file to land in the same place it was copied from.)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I agree. Good point.

)

file_from = []
for file_from_argument in file_from_arguments:
split_argument = file_from_argument.split(":")
if len(split_argument) != 3:
raise ValueError(MSG)

workload, source, destination = split_argument
if "/" not in workload:
raise ValueError(MSG)

file_from_argument_parsed = FileFromArgument(
workload=workload,
source=source,
destination=destination,
)
file_from.append(file_from_argument_parsed)

return tuple(file_from)


def check_connection_name(ctx, param, selected: Optional[str] = None) -> str:
from gefyra import api

Expand Down
65 changes: 65 additions & 0 deletions client/gefyra/cluster/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from time import sleep
from typing import Tuple, TYPE_CHECKING
from gefyra.api.utils import get_workload_type
import io

if TYPE_CHECKING:
from kubernetes.client.models import V1Pod
Expand Down Expand Up @@ -52,6 +53,70 @@ def get_env_from_pod_container(
)


def get_file_from_pod_container(
config: ClientConfiguration,
pod_name: str,
namespace: str,
container_name: str,
source: str,
) -> io.BytesIO:
from kubernetes.client import ApiException
from kubernetes.stream import stream

retries = 10
counter = 0
interval = 1
while counter < retries:
try:
import os

exec_command = [
"/bin/sh",
"-c",
f"cd {os.path.dirname(source)} && tar cf - {os.path.basename(source)}",
]

resp = stream(
config.K8S_CORE_API.connect_get_namespaced_pod_exec,
pod_name,
namespace,
container=container_name,
command=exec_command,
stderr=True,
stdin=False,
stdout=True,
tty=False,
_preload_content=False,
)

tar_buffer = io.BytesIO()
while resp.is_open():
resp.update(timeout=1)
if resp.peek_stdout():
tar_buffer.write(resp.read_stdout().encode("utf-8"))
if resp.peek_stderr():
logger.debug("Error:", resp.read_stderr())

resp.close()

tar_buffer.seek(0)
return tar_buffer
except ApiException as e:
if "500 Internal Server Error" in e.reason:
sleep(interval)
counter += 1
logger.debug(
f"Failed to copy file from pod {pod_name} in namespace {namespace} on"
f" try {counter}."
)
else:
raise e
raise RuntimeError(
f"Failed to copy file from pod {pod_name} in namespace {namespace} after"
f" {retries} tries."
)


def get_container(pod, container_name: str):
for container in pod.spec.containers:
if container.name == container_name:
Expand Down
Loading