Compare commits

...

4 Commits

Author SHA1 Message Date
Freezed 6ed57de1aa 🎨 Normalize field stripping #12
`strip_std_field()` will be used to strip fields from the `get_map_props()` in
the next iterations.
This modification aim to format data passed to `strip_std_field()` then
we can use it easyly later for other options.
2022-10-10 09:58:44 +02:00
Freezed cc16378233 🎨 Remove std case from conditional stmt #12 2022-10-10 09:35:09 +02:00
Freezed 4e01b8a5c7 🔀 Merge branch '13-std-fields'
Resolve "Strip LDP standards fields"

Closes #13

See merge request forga/tool/ovh/ldpy!4
2022-10-05 23:57:33 +00:00
Freezed d1b19fa491 Normalize fields stripping #13
Previous code was based on demo’s fields to remove undesired one.
As LDP populate hits with some standards fields, this code just remove it.
TODO #13 : build request to retrieve only desired fields
2022-10-06 01:56:31 +02:00
2 changed files with 67 additions and 65 deletions

68
ldpy.py
View File

@ -14,15 +14,31 @@ import argparse
import logging
import os
from pprint import pformat as pf
from pprint import pprint as pp
import sys
from opensearchpy import AuthorizationException
import client # pylint: disable=import-error
# Ranqe for the request retrieving the last entries.
# ###
# Config
# ###
# Range for the request retrieving the last entries.
LAST_E_CHOICES = range(1, 51)
STD_FIELDS = [
"X-OVH-CONTENT-SIZE",
"X-OVH-DELIVERY-DATE",
"X-OVH-INPUT",
"gl2_source_input",
"gl2_source_node",
"source",
"streams",
"timestamp",
]
logger = logging.getLogger(os.path.splitext(os.path.basename(sys.argv[0]))[0])
@ -99,17 +115,16 @@ def parse_args(args=sys.argv[1:]):
def get_last_entries(entries_nb):
"""Get the last n entries from a stream."""
if entries_nb in LAST_E_CHOICES:
logger.debug("Wait before getting '%s' entries!", entries_nb)
query = {"size": entries_nb}
last_entries = client.opnsrch_clt.search(body=query)
logger.debug(pf(last_entries))
else:
if entries_nb not in LAST_E_CHOICES:
logger.critical("'%s' is not in '%s'", entries_nb, LAST_E_CHOICES)
raise ValueError
return last_entries
logger.debug("Wait before getting '%s' entries!", entries_nb)
query = {"size": entries_nb}
last_entries = client.opnsrch_clt.search(body=query)
logger.debug(pf(last_entries))
return last_entries["hits"]["hits"]
def get_map_props():
@ -133,42 +148,29 @@ def get_map_props():
return map_props
def strip_demo_entries(raw_data):
"""Remove keys in entries to ease human reading.
def strip_std_field(initial_entries):
"""Remove standard LDP fields to ease human reading.
Returns a list populated with a dict for each entry.
This is a specific function for demo stream /!\
"""
stripped_data = []
stripped_entries = []
for initial_hit in initial_entries:
for raw_hit in raw_data["hits"]["hits"]:
for key in STD_FIELDS:
del initial_hit["_source"][key]
stripped_hit = {}
for key in [
"source",
"category",
"title",
"message",
"rating_num",
"timestamp",
]:
stripped_hit[key] = raw_hit["_source"][key]
stripped_entries.append(initial_hit["_source"])
stripped_data.append(stripped_hit)
return stripped_data
return stripped_entries
def main(options):
"""Execute as script. Functions related to the arguments passed.
Data stripper use a function build only for demo stream: `strip_demo_entries()`
"""
"""Execute as script. Functions related to the arguments passed."""
if options.mapping:
response = get_map_props()
logger.debug("Mapping for '%s' stream: %s", client.LDP_STREAM_NAME, response)
elif options.last:
response = pf(strip_demo_entries(get_last_entries(options.last)))
response = strip_std_field(get_last_entries(options.last))
logger.debug(
"Last '%s' entries for '%s' stream:", options.last, client.LDP_STREAM_NAME
)
@ -185,4 +187,4 @@ if __name__ == "__main__":
result = main(pargs)
if not pargs.debug and result:
print(result)
pp(result)

View File

@ -13,11 +13,12 @@ import pytest
from pytest import mark, raises
import ldpy
from ldpy import LAST_E_CHOICES
from ldpy import LAST_E_CHOICES, STD_FIELDS
# Min and Max values for the request retrieving the last entries.
LAST_E_MIN = LAST_E_CHOICES[0] - 1
LAST_E_MAX = len(LAST_E_CHOICES) + 1
META_FIELDS = ["_id", "_source", "_shards", "hits"]
# Faking options from argparse
@ -84,39 +85,37 @@ def test_parse_args_last_const():
# ###
# Testing strip_demo_entries()
# Testing strip_std_field()
# ###
def test_strip_demo_entries():
"""Remove keys/values to ease human readinq in demo stream."""
payload = {
"hits": {
"hits": [
{
"_id": "-",
"_source": {
"X-OVH-": "-",
"X-OVH-INPUT": "-",
"gl2_source": "-",
"gl2_source_node": "-",
"id": "-",
"source": "-",
"category": "-",
"title": "-",
"message": "-",
"rating_num": 42,
"streams": "-",
"timestamp": "-",
},
},
],
@mark.parametrize("fields", [META_FIELDS, STD_FIELDS])
def test_strip_std_field(fields):
"""Remove keys/values present in all LDP stream."""
payload = [
{
"_id": "-",
"_source": {
"category": "-",
"gl2_source_input": "",
"gl2_source_node": "-",
"id": "-",
"message": "-",
"rating_num": 42,
"source": "-",
"streams": [""],
"timestamp": "2022-10-04 20:59:22.364402",
"title": "-",
"X-OVH-CONTENT-SIZE": 42,
"X-OVH-DELIVERY-DATE": "2022-10-04T20:59:22.578894878Z",
"X-OVH-INPUT": "",
},
},
}
stripped_entry = ldpy.strip_demo_entries(payload)[0]
]
stripped_entry = ldpy.strip_std_field(payload)[0]
for key in ["source", "category", "title", "message", "rating_num", "timestamp"]:
stripped_entry.pop(key)
assert isinstance(stripped_entry, dict)
assert len(stripped_entry) == 0
for field in fields:
assert field not in stripped_entry
# ###
@ -134,7 +133,7 @@ def test_get_last_entries_out_of_range(entry_np):
def test_get_last_entries_in_range(entry_np):
"""Value is in range for the last entries."""
response = ldpy.get_last_entries(entry_np)
assert len(response["hits"]["hits"]) == entry_np
assert len(response) == entry_np
# ###
@ -166,4 +165,5 @@ def test_main_demo_with_last_const():
options = FakeOptions(["last"])
response = ldpy.main(options)
assert isinstance(response, str)
assert isinstance(response, list)
assert isinstance(response[0], dict)