#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Author: freezed 2018-07-24 Version: 0.1 Licence: `GNU GPL v3` GNU GPL v3: http://www.gnu.org/licenses/ Call Open Food Facts API to populate a local MariaDB/MySQL database with product data This DB will serve an CLI client which gives alternative products with better nurition grade. """ import json import requests from config import FIELD_KEPT, API_URL_CAT def get_product(code, from_file=False): """ Call Open Food Facts API to get data of a single product """ ERR_FILE = "File load error : {}" filename = 'sample/product-{}.json'.format(str(code)) try: int(code) except ValueError: # as except_detail: # print("Exception: «{}»".format(except_detail)) print(ERR_FILE.format(filename)) else: if from_file: from os import path # File did not exists if path.isfile(filename) is False: print(ERR_FILE.format(filename)) status = 404 product_json = {'status': 0} else: with open(filename, "r") as json_file: product_json = json.loads(json_file.read()) status = 200 else: response = requests.get( "https://fr.openfoodfacts.org/api/v0/product/{}.json".format(code) ) product_json = json.loads(response.text) status = response.status_code if product_json['status'] and status == 200: product_kept = { 'code': code, 'url': "https://fr.openfoodfacts.org/product/{}/".format(code) } for field in FIELD_KEPT['product']: product_kept[field] = product_json['product'][field] return product_kept else: return False def get_category(name, from_file=False): """ Call Open Food Facts API to get data of products in a single category :return: Dict filled with products & kept fields First try, TODO : - work offline with local JSON - need to get all the products of a category :Tests ONLINE: >>> prod_false = get_category('1664') >>> prod_false False >>> prod_bles = get_category('blés') :Tests OFFLINE: # >>> prod_bles = get_category('biscuits', True) >>> prod_bles['category'] == 'biscuits' True >>> 'count' in prod_bles True >>> 'product_name' in prod_bles['products'][0] True >>> 'nutrition_grades' in prod_bles['products'][0] True >>> 'categories_tags' in prod_bles['products'][0] True >>> get_category('wrong_file', True) File load error : sample/category-wrong_file.json False # >>> pprint.pprint(prod_bles) """ if from_file: from os import path filename = 'sample/category-{}.json'.format(str(name)) # File did not exists if path.isfile(filename) is False: print("File load error : {}".format(filename)) status = 404 cat_json = {'count': 0} else: with open(filename, "r") as json_file: cat_json = json.loads(json_file.read()) status = 200 # Requests over API else: page = 1 response = requests.get(API_URL_CAT.format(str(name), page)) cat_json = json.loads(response.text) status = response.status_code # Gets data if cat_json['count'] > 0: # Defines dict it will be returned staging_data = { # 'count': cat_json['count'], 'category': str(name), 'products': [] } # Counts pages of this category total_pages = int(cat_json['count'] // cat_json['page_size']) if int(cat_json['count'] % cat_json['page_size']) > 0: total_pages += 1 # Loops on data from 1st page for idx, product_fields in enumerate(cat_json['products']): staging_data['products'].append(dict()) for field in FIELD_KEPT['category']: if field in product_fields: staging_data['products'][idx][field] = product_fields[field] else: staging_data['products'][idx][field] = False # Gets data for all other pages while page < total_pages: # Requests next page over API page += 1 response = requests.get(API_URL_CAT.format(str(name), page)) cat_json = json.loads(response.text) idx = len(staging_data['products']) for product_fields in cat_json['products']: staging_data['products'].append(dict()) for field in FIELD_KEPT['category']: if field in product_fields: staging_data['products'][idx][field] = product_fields[field] else: staging_data['products'][idx][field] = False idx += 1 print("\t\t[…finish page {}/{} - {} ids]".format(page, total_pages, idx)) return staging_data else: return False def false_to_null(sql_list): """ Replacing nutrition_score="False" by nutrition_score=NULL """ for idx, request in enumerate(sql_list): if "False" in request: sql_list[idx] = "{}NULL{}".format( request[:request.find('False')-1], request[request.find('False')+6:] ) return sql_list def pick_category(cat_list): """ Picks only one category to associate the product in the local DB One of the shortest tag (without langage prefix) is taken. For improvement it is a good place to adds more work here, like selecting by langage prefix. :Tests: >>> pick_category(['en:sugary-snacks', 'en:biscuits-and-cakes', \ 'en:biscuits']) 'biscuits' """ if len(cat_list) > 1: # get idx of the shortest tag flip_list = [(len(cat), idx) for idx, cat in enumerate(cat_list)] flip_list.sort() shortest_tag_idx = flip_list[0][1] return cat_list[shortest_tag_idx].split(":")[1] elif len(cat_list) == 1: return cat_list[0].split(":")[1] else: return False def sql_generator(staging_data): """ Uses `staging_data` to generate SQL INSERT requests. :staging_data: dict() created with `get_product()` or `get_category()` :return: list() of SQL requests :Tests: >>> sql_generator(False) is False True >>> bisc = {'count': 4377,'category':'biscuits','products':[{'_id':'8480000141323','categories_tags':['en:sugary-snacks','en:biscuits-and-cakes','en:biscuits'],'nutrition_grades':'e','product_name':'Galletas María Dorada Hacendado','url':'https://fr-en.openfoodfacts.org/product/8480000141323/galletas-maria-dorada-hacendado'},{'_id':'3593551174971','categories_tags':['en:sugary-snacks','en:biscuits-and-cakes','en:biscuits'],'nutrition_grades':'False','product_name':'Les Broyés du Poitou','url':'https://fr-en.openfoodfacts.org/product/3593551174971/les-broyes-du-poitou-les-mousquetaires'}]} >>> sql_list_bisc = sql_generator(bisc) >>> sql_list_bisc[0] "INSERT INTO category (`name`) VALUES ('biscuits');" >>> sql_list_bisc[1] 'INSERT INTO product (`name`, `code`, `url`, `nutrition_grades`, `category_id`) SELECT "Galletas María Dorada Hacendado", "8480000141323", "https://fr-en.openfoodfacts.org/product/8480000141323/galletas-maria-dorada-hacendado", "e", id AS category_id FROM category WHERE name = "biscuits";' >>> sql_list_bisc[2] 'INSERT INTO product (`name`, `code`, `url`, `nutrition_grades`, `category_id`) SELECT "Les Broyés du Poitou", "3593551174971", "https://fr-en.openfoodfacts.org/product/3593551174971/les-broyes-du-poitou-les-mousquetaires", NULL, id AS category_id FROM category WHERE name = "biscuits";' >>> oreo = {'categories_tags':['en:sugary-snacks','en:biscuits-and-cakes','en:biscuits','en:chocolate-biscuits','es:sandwich-cookies'],'code':'8410000810004','nutrition_grades':'e','product_name':'Biscuit Oreo', 'url':'https://fr.openfoodfacts.org/product/8410000810004/'} >>> sql_list_oreo = sql_generator(oreo) >>> sql_list_oreo[0] "INSERT INTO category (`name`) VALUES ('biscuits');" >>> sql_list_oreo[1] 'INSERT INTO product (`name`, `code`, `url`, `nutrition_grades`, `category_id`) SELECT "Biscuit Oreo", "8410000810004", "https://fr.openfoodfacts.org/product/8410000810004/", "e", id AS category_id FROM category WHERE name = "biscuits";' >>> oreo_nutri_null = {'categories_tags':['en:sugary-snacks','en:biscuits-and-cakes','en:biscuits','en:chocolate-biscuits','es:sandwich-cookies'],'code':'8410000810004','nutrition_grades':'False','product_name':'Biscuit Oreo', 'url':'https://fr.openfoodfacts.org/product/8410000810004/'} >>> sql_list_oreo_nutri_null = sql_generator(oreo_nutri_null) >>> sql_list_oreo_nutri_null[1] 'INSERT INTO product (`name`, `code`, `url`, `nutrition_grades`, `category_id`) SELECT "Biscuit Oreo", "8410000810004", "https://fr.openfoodfacts.org/product/8410000810004/", NULL, id AS category_id FROM category WHERE name = "biscuits";' """ sql_list = [] insert_cat = "INSERT INTO category (`name`) VALUES ('{}');" insert_prod = """INSERT INTO product (`name`, `code`, `url`, `nutrition_grades`, `category_id`) \ SELECT "{name}", "{code}", "{url}", "{nutri}", id AS category_id \ FROM category \ WHERE name = "{cat}";""" if staging_data is not False and 'category' in staging_data.keys(): used_category = staging_data['category'] # insert category sql_list.append(insert_cat.format(used_category)) # insert products for idx, val in enumerate(staging_data['products']): sql_list.append( insert_prod.format( code=val['_id'], url=val['url'], name=val['product_name'], nutri=val['nutrition_grades'], cat=used_category ) ) elif staging_data is not False and 'product_name' in staging_data.keys(): used_category = pick_category(staging_data['categories_tags']) # insert category sql_list.append(insert_cat.format(used_category)) sql_list.append( insert_prod.format( code=staging_data['code'], url=staging_data['url'], name=staging_data['product_name'], nutri=staging_data['nutrition_grades'], cat=used_category ) ) else: sql_list = False if sql_list is not False: sql_list = false_to_null(sql_list) return sql_list if __name__ == "__main__": import doctest doctest.testmod()