Skip to content

Commit

Permalink
Experimental TinkerPop 4.0 support (#704)
Browse files Browse the repository at this point in the history
* Initial TinkerPop 4.0 support

* update changelog
  • Loading branch information
michaelnchin authored Oct 1, 2024
1 parent cf196f3 commit 7f02355
Show file tree
Hide file tree
Showing 7 changed files with 129 additions and 48 deletions.
3 changes: 2 additions & 1 deletion ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ Starting with v1.31.6, this file will contain a record of major features and upd

## Upcoming

- Add documentation for group keys in `%%graph_notebook_vis_options` ([Link to PR](https://github.com/aws/graph-notebook/pull/703))
- Added experimental TinkerPop 4.0 support ([Link to PR](https://github.com/aws/graph-notebook/pull/704))
- Added documentation for group keys in `%%graph_notebook_vis_options` ([Link to PR](https://github.com/aws/graph-notebook/pull/703))
- Enabled `--query-timeout` on `%%oc explain` for Neptune Analytics ([Link to PR](https://github.com/aws/graph-notebook/pull/701))

## Release 4.6.0 (September 19, 2024)
Expand Down
15 changes: 8 additions & 7 deletions src/graph_notebook/configuration/generate_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
GRAPHBINARYV1, GREMLIN_SERIALIZERS_HTTP, GREMLIN_SERIALIZERS_WS,
GREMLIN_SERIALIZERS_ALL, NEPTUNE_GREMLIN_SERIALIZERS_HTTP,
DEFAULT_GREMLIN_WS_SERIALIZER, DEFAULT_GREMLIN_HTTP_SERIALIZER,
NEPTUNE_GREMLIN_SERIALIZERS_HTTP_NEXT, DEFAULT_GREMLIN_HTTP_SERIALIZER_NEXT,
NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME,
normalize_service_name, normalize_protocol_name,
normalize_serializer_class_name)
Expand Down Expand Up @@ -93,16 +94,16 @@ def __init__(self, traversal_source: str = '', username: str = '', password: str
print(f"Enforcing HTTP protocol.")
connection_protocol = DEFAULT_HTTP_PROTOCOL
# temporary restriction until GraphSON-typed and GraphBinary results are supported
if message_serializer not in NEPTUNE_GREMLIN_SERIALIZERS_HTTP:
if message_serializer not in NEPTUNE_GREMLIN_SERIALIZERS_HTTP_NEXT:
if message_serializer not in GREMLIN_SERIALIZERS_ALL:
if invalid_serializer_input:
print(f"Invalid serializer specified, defaulting to {DEFAULT_GREMLIN_HTTP_SERIALIZER}. "
f"Valid serializers: {NEPTUNE_GREMLIN_SERIALIZERS_HTTP}")
print(f"Invalid serializer specified, defaulting to {DEFAULT_GREMLIN_HTTP_SERIALIZER_NEXT}. "
f"Valid serializers: {NEPTUNE_GREMLIN_SERIALIZERS_HTTP_NEXT}")
else:
print(f"{message_serializer} is not currently supported for HTTP connections, "
f"defaulting to {DEFAULT_GREMLIN_HTTP_SERIALIZER}. "
f"Please use one of: {NEPTUNE_GREMLIN_SERIALIZERS_HTTP}")
message_serializer = DEFAULT_GREMLIN_HTTP_SERIALIZER
f"defaulting to {DEFAULT_GREMLIN_HTTP_SERIALIZER_NEXT}. "
f"Please use one of: {NEPTUNE_GREMLIN_SERIALIZERS_HTTP_NEXT}")
message_serializer = DEFAULT_GREMLIN_HTTP_SERIALIZER_NEXT
else:
if connection_protocol not in [DEFAULT_WS_PROTOCOL, DEFAULT_HTTP_PROTOCOL]:
if invalid_protocol_input:
Expand Down Expand Up @@ -342,7 +343,7 @@ def generate_default_config():
parser.add_argument("--gremlin_password", help="the password to use when creating Gremlin connections", default='')
parser.add_argument("--gremlin_serializer",
help="the serializer to use as the encoding format when creating Gremlin connections",
default=DEFAULT_GREMLIN_SERIALIZER)
default='')
parser.add_argument("--gremlin_connection_protocol",
help="the connection protocol to use for Gremlin connections",
default='')
Expand Down
63 changes: 52 additions & 11 deletions src/graph_notebook/magics/graph_magic.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@
SPARQL_EXPLAIN_MODES, OPENCYPHER_EXPLAIN_MODES, GREMLIN_EXPLAIN_MODES, \
OPENCYPHER_PLAN_CACHE_MODES, OPENCYPHER_DEFAULT_TIMEOUT, OPENCYPHER_STATUS_STATE_MODES, \
normalize_service_name, NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME, GRAPH_PG_INFO_METRICS, \
GREMLIN_PROTOCOL_FORMATS, DEFAULT_HTTP_PROTOCOL, DEFAULT_WS_PROTOCOL, \
GREMLIN_SERIALIZERS_WS, GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP, normalize_protocol_name, generate_snapshot_name)
GREMLIN_PROTOCOL_FORMATS, DEFAULT_HTTP_PROTOCOL, DEFAULT_WS_PROTOCOL, GRAPHSONV4_UNTYPED, \
GREMLIN_SERIALIZERS_WS, get_gremlin_serializer_mime, normalize_protocol_name, generate_snapshot_name)
from graph_notebook.network import SPARQLNetwork
from graph_notebook.network.gremlin.GremlinNetwork import parse_pattern_list_str, GremlinNetwork
from graph_notebook.visualization.rows_and_columns import sparql_get_rows_and_columns, opencypher_get_rows_and_columns
Expand Down Expand Up @@ -1091,6 +1091,9 @@ def gremlin(self, line, cell, local_ns: dict = None):
f'If not specified, defaults to the value of the gremlin.connection_protocol field '
f'in %%graph_notebook_config. Please note that this option has no effect on the '
f'Profile and Explain modes, which must use HTTP.')
parser.add_argument('-qp', '--query-parameters', type=str, default='',
help='Parameter definitions to apply to the query. This option can accept a local variable '
'name, or a string representation of the map.')
parser.add_argument('--explain-type', type=str.lower, default='dynamic',
help=f'Explain mode to use when using the explain query mode. '
f'Accepted values: {GREMLIN_EXPLAIN_MODES}')
Expand Down Expand Up @@ -1160,6 +1163,21 @@ def gremlin(self, line, cell, local_ns: dict = None):
logger.debug(f'Arguments {args}')
results_df = None

query_params = None
if args.query_parameters:
if args.query_parameters in local_ns:
query_params_input = local_ns[args.query_parameters]
else:
query_params_input = args.query_parameters
if isinstance(query_params_input, dict):
query_params = json.dumps(query_params_input)
else:
try:
query_params_dict = json.loads(query_params_input.replace("'", '"'))
query_params = json.dumps(query_params_dict)
except Exception as e:
print(f"Invalid query parameter input, ignoring.")

if args.no_scroll:
gremlin_layout = UNRESTRICTED_LAYOUT
gremlin_scrollY = True
Expand All @@ -1184,8 +1202,13 @@ def gremlin(self, line, cell, local_ns: dict = None):

if mode == QueryMode.EXPLAIN:
try:
explain_args = {}
if args.explain_type:
explain_args['explain.mode'] = args.explain_type
if self.client.is_analytics_domain() and query_params:
explain_args['parameters'] = query_params
res = self.client.gremlin_explain(cell,
args={'explain.mode': args.explain_type} if args.explain_type else {})
args=explain_args)
res.raise_for_status()
except Exception as e:
if self.client.is_analytics_domain():
Expand Down Expand Up @@ -1219,6 +1242,8 @@ def gremlin(self, line, cell, local_ns: dict = None):
"profile.serializer": serializer,
"profile.indexOps": args.profile_indexOps,
"profile.debug": args.profile_debug}
if self.client.is_analytics_domain() and query_params:
profile_args['parameters'] = query_params
try:
profile_misc_args_dict = json.loads(args.profile_misc_args)
profile_args.update(profile_misc_args_dict)
Expand Down Expand Up @@ -1269,17 +1294,29 @@ def gremlin(self, line, cell, local_ns: dict = None):
try:
if connection_protocol == DEFAULT_HTTP_PROTOCOL:
using_http = True
headers = {}
message_serializer = self.graph_notebook_config.gremlin.message_serializer
message_serializer_mime = GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP[message_serializer]
query_res_http = self.client.gremlin_http_query(cell, headers={
'Accept': message_serializer_mime})
message_serializer_mime = get_gremlin_serializer_mime(message_serializer, DEFAULT_HTTP_PROTOCOL)
if message_serializer_mime != GRAPHSONV4_UNTYPED:
headers['Accept'] = message_serializer_mime
passed_params = query_params if self.client.is_analytics_domain() else None
query_res_http = self.client.gremlin_http_query(cell,
headers=headers,
query_params=passed_params)
query_res_http.raise_for_status()
try:
query_res_http_json = query_res_http.json()
except JSONDecodeError:
query_res_fixed = repair_json(query_res_http.text)
query_res_http_json = json.loads(query_res_fixed)
query_res = query_res_http_json['result']['data']
if 'result' in query_res_http_json:
query_res = query_res_http_json['result']['data']
else:
if 'reason' in query_res_http_json:
logger.debug('Query failed with internal error, see response.')
else:
logger.debug('Received unexpected response format, outputting as single entry.')
query_res = [query_res_http_json]
else:
query_res = self.client.gremlin_query(cell, transport_args=transport_args)
except Exception as e:
Expand Down Expand Up @@ -1317,7 +1354,7 @@ def gremlin(self, line, cell, local_ns: dict = None):
ignore_groups=args.ignore_groups,
using_http=using_http)

if using_http and 'path()' in cell and query_res:
if using_http and 'path()' in cell and query_res and isinstance(query_res, list):
first_path = query_res[0]
if isinstance(first_path, dict) and first_path.keys() == {'labels', 'objects'}:
query_res_to_path_type = []
Expand Down Expand Up @@ -2844,8 +2881,8 @@ def seed(self, line, local_ns: dict = None):

if self.client.is_analytics_domain():
model_options = SEED_MODEL_OPTIONS_PG
custom_language_options = SEED_LANGUAGE_OPTIONS_OC
samples_pg_language_options = SEED_LANGUAGE_OPTIONS_OC
custom_language_options = SEED_LANGUAGE_OPTIONS_PG
samples_pg_language_options = SEED_LANGUAGE_OPTIONS_PG
else:
model_options = SEED_MODEL_OPTIONS
custom_language_options = SEED_LANGUAGE_OPTIONS
Expand Down Expand Up @@ -3121,7 +3158,11 @@ def process_gremlin_query_line(query_line, line_index, q):
logger.debug(f"Skipped blank query at line {line_index + 1} in seed file {q['name']}")
return 0
try:
self.client.gremlin_query(query_line)
if self.client.is_neptune_domain() and self.client.is_analytics_domain() and \
self.graph_notebook_config.gremlin.connection_protocol == DEFAULT_HTTP_PROTOCOL:
self.client.gremlin_http_query(query_line)
else:
self.client.gremlin_query(query_line)
return 0
except GremlinServerError as gremlinEx:
try:
Expand Down
27 changes: 22 additions & 5 deletions src/graph_notebook/neptune/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,27 +122,34 @@
GRAPHSONV1 = 'GraphSONMessageSerializerGremlinV1'
GRAPHSONV2 = 'GraphSONMessageSerializerV2'
GRAPHSONV3 = 'GraphSONMessageSerializerV3'
GRAPHSONV4 = 'GraphSONMessageSerializerV4'
GRAPHSONV1_UNTYPED = 'GraphSONUntypedMessageSerializerV1'
GRAPHSONV2_UNTYPED = 'GraphSONUntypedMessageSerializerV2'
GRAPHSONV3_UNTYPED = 'GraphSONUntypedMessageSerializerV3'
GRAPHSONV4_UNTYPED = 'GraphSONUntypedMessageSerializerV4'
GRAPHBINARYV1 = 'GraphBinaryMessageSerializerV1'

GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP = {
GRAPHSONV1: 'application/vnd.gremlin-v1.0+json',
GRAPHSONV2: 'application/vnd.gremlin-v2.0+json',
GRAPHSONV3: 'application/vnd.gremlin-v3.0+json',
GRAPHSONV4: 'application/vnd.gremlin-v4.0+json',
GRAPHSONV1_UNTYPED: 'application/vnd.gremlin-v1.0+json;types=false',
GRAPHSONV2_UNTYPED: 'application/vnd.gremlin-v2.0+json;types=false',
GRAPHSONV3_UNTYPED: 'application/vnd.gremlin-v3.0+json;types=false',
GRAPHSONV4_UNTYPED: 'application/vnd.gremlin-v4.0+json;types=false',
GRAPHBINARYV1: 'application/vnd.graphbinary-v1.0'
}

GREMLIN_SERIALIZERS_WS = [GRAPHSONV2, GRAPHSONV3, GRAPHBINARYV1]
GREMLIN_SERIALIZERS_HTTP = [GRAPHSONV1, GRAPHSONV1_UNTYPED, GRAPHSONV2_UNTYPED, GRAPHSONV3_UNTYPED]
GREMLIN_SERIALIZERS_ALL = GREMLIN_SERIALIZERS_WS + GREMLIN_SERIALIZERS_HTTP
GREMLIN_SERIALIZERS_HTTP_NEXT = [GRAPHSONV4, GRAPHSONV4_UNTYPED]
GREMLIN_SERIALIZERS_ALL = GREMLIN_SERIALIZERS_WS + GREMLIN_SERIALIZERS_HTTP + GREMLIN_SERIALIZERS_HTTP_NEXT
NEPTUNE_GREMLIN_SERIALIZERS_HTTP = [GRAPHSONV1_UNTYPED, GRAPHSONV2_UNTYPED, GRAPHSONV3_UNTYPED]
NEPTUNE_GREMLIN_SERIALIZERS_HTTP_NEXT = NEPTUNE_GREMLIN_SERIALIZERS_HTTP + [GRAPHSONV4_UNTYPED]
DEFAULT_GREMLIN_WS_SERIALIZER = GRAPHSONV3
DEFAULT_GREMLIN_HTTP_SERIALIZER = GRAPHSONV3_UNTYPED
DEFAULT_GREMLIN_HTTP_SERIALIZER_NEXT = GRAPHSONV4_UNTYPED
DEFAULT_GREMLIN_SERIALIZER = GRAPHSONV3_UNTYPED

DEFAULT_WS_PROTOCOL = "websockets"
Expand Down Expand Up @@ -184,11 +191,14 @@ def get_gremlin_serializer_driver_class(serializer_str: str):
return serializer.GraphSONSerializersV3d0()


def get_gremlin_serializer_mime(serializer_str: str):
def get_gremlin_serializer_mime(serializer_str: str, protocol: str = DEFAULT_GREMLIN_PROTOCOL):
if serializer_str in GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP.keys():
return GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP[serializer_str]
else:
return GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP[GRAPHSONV1_UNTYPED]
default_serializer_for_protocol = DEFAULT_GREMLIN_HTTP_SERIALIZER if protocol == DEFAULT_HTTP_PROTOCOL \
else DEFAULT_GREMLIN_WS_SERIALIZER
print(f"Invalid serializer, defaulting to {default_serializer_for_protocol}")
return GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP[default_serializer_for_protocol]


def normalize_protocol_name(protocol: str):
Expand Down Expand Up @@ -218,8 +228,10 @@ def normalize_serializer_class_name(serializer: str):
message_serializer += 'MessageSerializerGremlinV1'
elif 'v2' in serializer_lower:
message_serializer += 'MessageSerializerV2'
else:
elif 'v3' in serializer_lower:
message_serializer += 'MessageSerializerV3'
else:
message_serializer += 'MessageSerializerV4'
elif 'graphbinary' in serializer_lower:
message_serializer = GRAPHBINARYV1
else:
Expand Down Expand Up @@ -454,7 +466,7 @@ def gremlin_query(self, query, transport_args=None, bindings=None):
c.close()
raise e

def gremlin_http_query(self, query, headers=None) -> requests.Response:
def gremlin_http_query(self, query, headers=None, query_params: dict = None) -> requests.Response:
if headers is None:
headers = {}

Expand All @@ -465,6 +477,8 @@ def gremlin_http_query(self, query, headers=None) -> requests.Response:
data['query'] = query
data['language'] = 'gremlin'
headers['content-type'] = 'application/json'
if query_params:
data['parameters'] = str(query_params).replace("'", '"')
else:
uri = f'{self.get_uri(use_websocket=False, use_proxy=use_proxy)}/gremlin'
data['gremlin'] = query
Expand Down Expand Up @@ -499,6 +513,9 @@ def _gremlin_query_plan(self, query: str, plan_type: str, args: dict, ) -> reque
data['query'] = query
data['language'] = 'gremlin'
headers['content-type'] = 'application/json'
if 'parameters' in args:
query_params = args.pop('parameters')
data['parameters'] = str(query_params).replace("'", '"')
if plan_type == 'explain':
# Remove explain.mode once HTTP is changed
explain_mode = args.pop('explain.mode')
Expand Down
Loading

0 comments on commit 7f02355

Please sign in to comment.