From cc163782337cbfb3e0085e6a14a2b983865b7ce6 Mon Sep 17 00:00:00 2001 From: Freezed <2160318-free_zed@users.noreply.gitlab.com> Date: Mon, 10 Oct 2022 09:35:09 +0200 Subject: [PATCH 1/4] =?UTF-8?q?=F0=9F=8E=A8=20Remove=20std=20case=20from?= =?UTF-8?q?=20conditional=20stmt=20#12?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ldpy.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/ldpy.py b/ldpy.py index 4915886..f2bdb12 100755 --- a/ldpy.py +++ b/ldpy.py @@ -115,16 +115,15 @@ def parse_args(args=sys.argv[1:]): def get_last_entries(entries_nb): """Get the last n entries from a stream.""" - if entries_nb in LAST_E_CHOICES: - logger.debug("Wait before getting '%s' entries!", entries_nb) - query = {"size": entries_nb} - last_entries = client.opnsrch_clt.search(body=query) - logger.debug(pf(last_entries)) - - else: + if entries_nb not in LAST_E_CHOICES: logger.critical("'%s' is not in '%s'", entries_nb, LAST_E_CHOICES) raise ValueError + logger.debug("Wait before getting '%s' entries!", entries_nb) + query = {"size": entries_nb} + last_entries = client.opnsrch_clt.search(body=query) + logger.debug(pf(last_entries)) + return last_entries From 6ed57de1aa39309f8bbc1044a7b29645452a4dba Mon Sep 17 00:00:00 2001 From: Freezed <2160318-free_zed@users.noreply.gitlab.com> Date: Mon, 10 Oct 2022 09:58:44 +0200 Subject: [PATCH 2/4] =?UTF-8?q?=F0=9F=8E=A8=20Normalize=20field=20strippin?= =?UTF-8?q?g=20#12?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `strip_std_field()` will be used to strip fields from the `get_map_props()` in the next iterations. This modification aim to format data passed to `strip_std_field()` then we can use it easyly later for other options. --- ldpy.py | 4 ++-- tests_ldpy.py | 45 ++++++++++++++++++++------------------------- 2 files changed, 22 insertions(+), 27 deletions(-) diff --git a/ldpy.py b/ldpy.py index f2bdb12..7de8508 100755 --- a/ldpy.py +++ b/ldpy.py @@ -124,7 +124,7 @@ def get_last_entries(entries_nb): last_entries = client.opnsrch_clt.search(body=query) logger.debug(pf(last_entries)) - return last_entries + return last_entries["hits"]["hits"] def get_map_props(): @@ -154,7 +154,7 @@ def strip_std_field(initial_entries): Returns a list populated with a dict for each entry. """ stripped_entries = [] - for initial_hit in initial_entries["hits"]["hits"]: + for initial_hit in initial_entries: for key in STD_FIELDS: del initial_hit["_source"][key] diff --git a/tests_ldpy.py b/tests_ldpy.py index 010a7db..a5f2245 100644 --- a/tests_ldpy.py +++ b/tests_ldpy.py @@ -90,31 +90,26 @@ def test_parse_args_last_const(): @mark.parametrize("fields", [META_FIELDS, STD_FIELDS]) def test_strip_std_field(fields): """Remove keys/values present in all LDP stream.""" - payload = { - "_shards": {}, - "hits": { - "hits": [ - { - "_id": "-", - "_source": { - "category": "-", - "gl2_source_input": "", - "gl2_source_node": "-", - "id": "-", - "message": "-", - "rating_num": 42, - "source": "-", - "streams": [""], - "timestamp": "2022-10-04 20:59:22.364402", - "title": "-", - "X-OVH-CONTENT-SIZE": 42, - "X-OVH-DELIVERY-DATE": "2022-10-04T20:59:22.578894878Z", - "X-OVH-INPUT": "", - }, - }, - ], + payload = [ + { + "_id": "-", + "_source": { + "category": "-", + "gl2_source_input": "", + "gl2_source_node": "-", + "id": "-", + "message": "-", + "rating_num": 42, + "source": "-", + "streams": [""], + "timestamp": "2022-10-04 20:59:22.364402", + "title": "-", + "X-OVH-CONTENT-SIZE": 42, + "X-OVH-DELIVERY-DATE": "2022-10-04T20:59:22.578894878Z", + "X-OVH-INPUT": "", + }, }, - } + ] stripped_entry = ldpy.strip_std_field(payload)[0] assert isinstance(stripped_entry, dict) @@ -138,7 +133,7 @@ def test_get_last_entries_out_of_range(entry_np): def test_get_last_entries_in_range(entry_np): """Value is in range for the last entries.""" response = ldpy.get_last_entries(entry_np) - assert len(response["hits"]["hits"]) == entry_np + assert len(response) == entry_np # ### From 949520d2608fcffd2b94393937d01132a414e8bf Mon Sep 17 00:00:00 2001 From: Freezed <2160318-free_zed@users.noreply.gitlab.com> Date: Tue, 11 Oct 2022 01:11:31 +0200 Subject: [PATCH 3/4] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Build=20request=20with?= =?UTF-8?q?=20stream=20specific=20fields=20#12?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All LDP stream shares standards fields This commit remove those from the stream mapping and build a request asking only for the stream specific fields. --- ldpy.py | 41 ++++++++++++++++--------------- tests_ldpy.py | 67 ++++++++++++++++++++++++++++----------------------- 2 files changed, 59 insertions(+), 49 deletions(-) diff --git a/ldpy.py b/ldpy.py index 7de8508..f51519f 100755 --- a/ldpy.py +++ b/ldpy.py @@ -113,33 +113,38 @@ def parse_args(args=sys.argv[1:]): return parser.parse_args(args) -def get_last_entries(entries_nb): +def request_last_entries(entries_nb, fields=[]): """Get the last n entries from a stream.""" if entries_nb not in LAST_E_CHOICES: logger.critical("'%s' is not in '%s'", entries_nb, LAST_E_CHOICES) raise ValueError - logger.debug("Wait before getting '%s' entries!", entries_nb) - query = {"size": entries_nb} - last_entries = client.opnsrch_clt.search(body=query) + logger.debug("Requesting last '%s' entries…", entries_nb) + last_entries = client.opnsrch_clt.search( + body={"size": entries_nb, "_source": fields} + ) + logger.debug(pf(last_entries)) return last_entries["hits"]["hits"] -def get_map_props(): +def request_map_props(): """Get stream mapping. LDP indices are rolling and LDP do not implement the call to get indices related to an alias. """ + logger.debug("Requesting mapping for '%s' stream…", client.LDP_STREAM_NAME) + try: indices = list(client.opnsrch_clt.indices.get_alias("*")) mapping = client.opnsrch_clt.indices.get_mapping(client.LDP_ALIAS)[indices[0]][ "mappings" ] map_props = sorted(list(mapping["properties"])) - logger.debug("Mapping found for alias: '%s'", client.LDP_ALIAS) + logger.debug("Mapping received for alias: '%s'", client.LDP_ALIAS) + logger.debug(map_props) except AuthorizationException: map_props = None @@ -148,33 +153,31 @@ def get_map_props(): return map_props -def strip_std_field(initial_entries): +def strip_std_field(fields): """Remove standard LDP fields to ease human reading. - Returns a list populated with a dict for each entry. + Returns a list populated with a list of stream specific fields. """ - stripped_entries = [] - for initial_hit in initial_entries: + for field in STD_FIELDS: + fields.remove(field) - for key in STD_FIELDS: - del initial_hit["_source"][key] - - stripped_entries.append(initial_hit["_source"]) - - return stripped_entries + return fields def main(options): """Execute as script. Functions related to the arguments passed.""" if options.mapping: - response = get_map_props() - logger.debug("Mapping for '%s' stream: %s", client.LDP_STREAM_NAME, response) + response = request_map_props() + elif options.last: - response = strip_std_field(get_last_entries(options.last)) + response = request_last_entries( + options.last, strip_std_field(request_map_props()) + ) logger.debug( "Last '%s' entries for '%s' stream:", options.last, client.LDP_STREAM_NAME ) logger.debug(response) + else: raise NotImplementedError diff --git a/tests_ldpy.py b/tests_ldpy.py index a5f2245..d778e3f 100644 --- a/tests_ldpy.py +++ b/tests_ldpy.py @@ -19,6 +19,7 @@ from ldpy import LAST_E_CHOICES, STD_FIELDS LAST_E_MIN = LAST_E_CHOICES[0] - 1 LAST_E_MAX = len(LAST_E_CHOICES) + 1 META_FIELDS = ["_id", "_source", "_shards", "hits"] +DEMO_STREAM_FIELDS = ["rating_num", "message", "title", "id", "category"] # Faking options from argparse @@ -89,51 +90,57 @@ def test_parse_args_last_const(): # ### @mark.parametrize("fields", [META_FIELDS, STD_FIELDS]) def test_strip_std_field(fields): - """Remove keys/values present in all LDP stream.""" + """Remove fields present in all LDP stream.""" payload = [ - { - "_id": "-", - "_source": { - "category": "-", - "gl2_source_input": "", - "gl2_source_node": "-", - "id": "-", - "message": "-", - "rating_num": 42, - "source": "-", - "streams": [""], - "timestamp": "2022-10-04 20:59:22.364402", - "title": "-", - "X-OVH-CONTENT-SIZE": 42, - "X-OVH-DELIVERY-DATE": "2022-10-04T20:59:22.578894878Z", - "X-OVH-INPUT": "", - }, - }, + "category", + "gl2_source_input", + "gl2_source_node", + "id", + "message", + "rating_num", + "source", + "streams", + "timestamp", + "title", + "X-OVH-CONTENT-SIZE", + "X-OVH-DELIVERY-DATE", + "X-OVH-INPUT", ] - stripped_entry = ldpy.strip_std_field(payload)[0] + stripped_entry = ldpy.strip_std_field(payload) - assert isinstance(stripped_entry, dict) + assert isinstance(stripped_entry, list) for field in fields: assert field not in stripped_entry # ### -# Testing get_last_entries() +# Testing request_last_entries() # ### -@mark.parametrize("entry_np", [LAST_E_MIN, LAST_E_MAX]) -def test_get_last_entries_out_of_range(entry_np): +@mark.parametrize("opt_last", [LAST_E_MIN, LAST_E_MAX]) +def test_request_last_entries_out_of_range(opt_last): """Value is out of range for the last entries.""" with raises(ValueError): - ldpy.get_last_entries(entry_np) + ldpy.request_last_entries(opt_last) @pytest.mark.vcr() -@mark.parametrize("entry_np", LAST_E_CHOICES) -def test_get_last_entries_in_range(entry_np): +@mark.parametrize( + "opt_last, fields", + [ + (LAST_E_CHOICES[0], DEMO_STREAM_FIELDS), + (len(LAST_E_CHOICES), DEMO_STREAM_FIELDS), + ], +) +def test_request_last_entries_in_range(opt_last, fields): """Value is in range for the last entries.""" - response = ldpy.get_last_entries(entry_np) - assert len(response) == entry_np + response_id = opt_last - 1 + response = ldpy.request_last_entries(opt_last, fields) + + assert len(response) == opt_last + for field in STD_FIELDS: + assert field not in response[0]["_source"] + assert field not in response[response_id]["_source"] # ### @@ -151,7 +158,7 @@ def test_main_without_option(): def test_main_demo_with_mapping(): """Called with mapping option. - `main()` just tranfers `get_map_props()` return: this test is not really useful + `main()` just transfers `request_map_props()` return: this test is not really useful """ options = FakeOptions(["mapping"]) response = ldpy.main(options) From 00d3a3e915f9771c2df9885105ab6efcdffbbb8a Mon Sep 17 00:00:00 2001 From: Freezed <2160318-free_zed@users.noreply.gitlab.com> Date: Tue, 11 Oct 2022 02:32:54 +0200 Subject: [PATCH 4/4] =?UTF-8?q?=F0=9F=93=9D=20Update=20README=20#12?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index b0b22b8..050c367 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,9 @@ LDPy ==== -🚧 _work in progress_ 🚧 -------------------------- +This project moved to: [`git.afpy.org`](https://git.afpy.org/fcode/ldpy) -_Take a look on other branches if you are curious…_ +🚧 _work in progress, take a look on other branches if you are curious…_ --- @@ -26,7 +25,7 @@ An [OVH Log Data Platform](https://docs.ovh.com/fr/logs-data-platform/) python c Get the source & edit settings: ```bash -git clone git@gitlab.com:forga/tool/ovh/ldpy.git && cd ldpy +git clone git@git.afpy.org:fcode/LDPy.git && cd ldpy cp client.sample.py client.py ${EDITOR} client.py ``` @@ -40,13 +39,14 @@ pip install -r requirements.txt ``` * Get stream mapping: `./ldpy.py --mapping` +* Get last `n` entries: `./ldpy.py --last` 🚧 Devel quick-start -------------------- - Built with `Python 3.8` -- Code linting with [`flake8`](https://pypi.org/project/flake8/), [`pylint`](https://pypi.org/project/pylint), [`black`](https://pypi.org/project/black) & [pydocstyle](https://pypi.org/project/pydocstyle/). +- Code linting with [`flake8`](https://pypi.org/project/flake8/), [`pylint`](https://pypi.org/project/pylint), [`black`](https://pypi.org/project/black) & [`pydocstyle`](https://pypi.org/project/pydocstyle/). - Install development tools: * `pip install -r requirements-dev.txt` - A `Makefile` with tools : run `make help` to have a look