🎨 Normalize field stripping #12

`strip_std_field()` will be used to strip fields from the `get_map_props()` in the next iterations. This modification aim to format data passed to `strip_std_field()` then we can use it easyly later for other options.
🎨 Remove std case from conditional stmt #12
2022-10-10 09:58:44 +02:00 · 2022-10-10 09:35:09 +02:00 · 2022-10-05 23:57:33 +00:00 · 2022-10-06 01:56:31 +02:00
2 changed files with 67 additions and 65 deletions
--- a/ldpy.py
+++ b/ldpy.py
@ -14,15 +14,31 @@ import argparse
 import logging
 import os
 from pprint import pformat as pf
+from pprint import pprint as pp
 import sys

 from opensearchpy import AuthorizationException

 import client  # pylint: disable=import-error

-# Ranqe for the request retrieving the last entries.
+
+# ###
+# Config
+# ###
+# Range for the request retrieving the last entries.
 LAST_E_CHOICES = range(1, 51)

+STD_FIELDS = [
+    "X-OVH-CONTENT-SIZE",
+    "X-OVH-DELIVERY-DATE",
+    "X-OVH-INPUT",
+    "gl2_source_input",
+    "gl2_source_node",
+    "source",
+    "streams",
+    "timestamp",
+]
+
 logger = logging.getLogger(os.path.splitext(os.path.basename(sys.argv[0]))[0])


@ -99,17 +115,16 @@ def parse_args(args=sys.argv[1:]):

 def get_last_entries(entries_nb):
    """Get the last n entries from a stream."""
-    if entries_nb in LAST_E_CHOICES:
-        logger.debug("Wait before getting '%s' entries!", entries_nb)
-        query = {"size": entries_nb}
-        last_entries = client.opnsrch_clt.search(body=query)
-        logger.debug(pf(last_entries))
-
-    else:
+    if entries_nb not in LAST_E_CHOICES:
        logger.critical("'%s' is not in '%s'", entries_nb, LAST_E_CHOICES)
        raise ValueError

-    return last_entries
+    logger.debug("Wait before getting '%s' entries!", entries_nb)
+    query = {"size": entries_nb}
+    last_entries = client.opnsrch_clt.search(body=query)
+    logger.debug(pf(last_entries))
+
+    return last_entries["hits"]["hits"]


 def get_map_props():
@ -133,42 +148,29 @@ def get_map_props():
    return map_props


-def strip_demo_entries(raw_data):
-    """Remove keys in entries to ease human reading.
+def strip_std_field(initial_entries):
+    """Remove standard LDP fields to ease human reading.

    Returns a list populated with a dict for each entry.
-    This is a specific function for demo stream /!\
    """
-    stripped_data = []
+    stripped_entries = []
+    for initial_hit in initial_entries:

-    for raw_hit in raw_data["hits"]["hits"]:
+        for key in STD_FIELDS:
+            del initial_hit["_source"][key]

-        stripped_hit = {}
-        for key in [
-            "source",
-            "category",
-            "title",
-            "message",
-            "rating_num",
-            "timestamp",
-        ]:
-            stripped_hit[key] = raw_hit["_source"][key]
+        stripped_entries.append(initial_hit["_source"])

-        stripped_data.append(stripped_hit)
-
-    return stripped_data
+    return stripped_entries


 def main(options):
-    """Execute as script. Functions related to the arguments passed.
-
-    Data stripper use a function build only for demo stream: `strip_demo_entries()`
-    """
+    """Execute as script. Functions related to the arguments passed."""
    if options.mapping:
        response = get_map_props()
        logger.debug("Mapping for '%s' stream: %s", client.LDP_STREAM_NAME, response)
    elif options.last:
-        response = pf(strip_demo_entries(get_last_entries(options.last)))
+        response = strip_std_field(get_last_entries(options.last))
        logger.debug(
            "Last '%s' entries for '%s' stream:", options.last, client.LDP_STREAM_NAME
        )
@ -185,4 +187,4 @@ if __name__ == "__main__":
    result = main(pargs)

    if not pargs.debug and result:
-        print(result)
+        pp(result)
--- a/tests_ldpy.py
+++ b/tests_ldpy.py
@ -13,11 +13,12 @@ import pytest
 from pytest import mark, raises

 import ldpy
-from ldpy import LAST_E_CHOICES
+from ldpy import LAST_E_CHOICES, STD_FIELDS

 # Min and Max values for the request retrieving the last entries.
 LAST_E_MIN = LAST_E_CHOICES[0] - 1
 LAST_E_MAX = len(LAST_E_CHOICES) + 1
+META_FIELDS = ["_id", "_source", "_shards", "hits"]


 # Faking options from argparse
@ -84,39 +85,37 @@ def test_parse_args_last_const():


 # ###
-# Testing strip_demo_entries()
+# Testing strip_std_field()
 # ###
-def test_strip_demo_entries():
-    """Remove keys/values to ease human readinq in demo stream."""
-    payload = {
-        "hits": {
-            "hits": [
-                {
-                    "_id": "-",
-                    "_source": {
-                        "X-OVH-": "-",
-                        "X-OVH-INPUT": "-",
-                        "gl2_source": "-",
-                        "gl2_source_node": "-",
-                        "id": "-",
-                        "source": "-",
-                        "category": "-",
-                        "title": "-",
-                        "message": "-",
-                        "rating_num": 42,
-                        "streams": "-",
-                        "timestamp": "-",
-                    },
-                },
-            ],
+@mark.parametrize("fields", [META_FIELDS, STD_FIELDS])
+def test_strip_std_field(fields):
+    """Remove keys/values present in all LDP stream."""
+    payload = [
+        {
+            "_id": "-",
+            "_source": {
+                "category": "-",
+                "gl2_source_input": "",
+                "gl2_source_node": "-",
+                "id": "-",
+                "message": "-",
+                "rating_num": 42,
+                "source": "-",
+                "streams": [""],
+                "timestamp": "2022-10-04 20:59:22.364402",
+                "title": "-",
+                "X-OVH-CONTENT-SIZE": 42,
+                "X-OVH-DELIVERY-DATE": "2022-10-04T20:59:22.578894878Z",
+                "X-OVH-INPUT": "",
+            },
        },
-    }
-    stripped_entry = ldpy.strip_demo_entries(payload)[0]
+    ]
+    stripped_entry = ldpy.strip_std_field(payload)[0]

-    for key in ["source", "category", "title", "message", "rating_num", "timestamp"]:
-        stripped_entry.pop(key)
+    assert isinstance(stripped_entry, dict)

-    assert len(stripped_entry) == 0
+    for field in fields:
+        assert field not in stripped_entry


 # ###
@ -134,7 +133,7 @@ def test_get_last_entries_out_of_range(entry_np):
 def test_get_last_entries_in_range(entry_np):
    """Value is in range for the last entries."""
    response = ldpy.get_last_entries(entry_np)
-    assert len(response["hits"]["hits"]) == entry_np
+    assert len(response) == entry_np


 # ###
@ -166,4 +165,5 @@ def test_main_demo_with_last_const():
    options = FakeOptions(["last"])
    response = ldpy.main(options)

-    assert isinstance(response, str)
+    assert isinstance(response, list)
+    assert isinstance(response[0], dict)
Author	SHA1	Message	Date
Freezed	6ed57de1aa	🎨 Normalize field stripping #12 `strip_std_field()` will be used to strip fields from the `get_map_props()` in the next iterations. This modification aim to format data passed to `strip_std_field()` then we can use it easyly later for other options.	2022-10-10 09:58:44 +02:00
Freezed	cc16378233	🎨 Remove std case from conditional stmt #12	2022-10-10 09:35:09 +02:00
Freezed	4e01b8a5c7	🔀 Merge branch '13-std-fields' Resolve "Strip LDP standards fields" Closes #13 See merge request forga/tool/ovh/ldpy!4	2022-10-05 23:57:33 +00:00
Freezed	d1b19fa491	✨ Normalize fields stripping #13 Previous code was based on demo’s fields to remove undesired one. As LDP populate hits with some standards fields, this code just remove it. TODO #13 : build request to retrieve only desired fields	2022-10-06 01:56:31 +02:00