Bulk Extracts with GraphQL

Home
Essential Knowledge
By Type
Building Blocks
APIs
Releases

How-to Guide

Author:

Movyn John

Changed on:

20 Nov 2023

Key Points

A python script can be used for ad-hoc extracts of a large number of GQL entities into a CSV file.
Currently the Fluent platform does not support any bulk extracts.
This scripts helps to extract data in bulk using the pagination functionality provided by the schema.
Depending on the amount of data that is extracted the script might run for a while.
The above codes are provided as is and are for reference purposes.

Steps

Pre-Requisites

Python3 (brew install python3)
IDE (e.g. PyCharm)
Some developer/python experience

After setting up Python3 and a Python IDE import the python script below

1import json
2import csv
3import requests
4from graphqlclient import GraphQLClient
5from http.client import IncompleteRead
6from datetime import datetime
7 
8# ######## CONFIGURATION SECTION START
9page_query = '''
10query extractInventoryPositions($after:String, $first:Int) {
11  inventoryPositions(after:$after, first:$first){
12    edges {
13      node {
14        ref
15        status
16        productRef
17        locationRef
18        onHand
19        updatedOn
20        createdOn
21      }
22      cursor
23    }
24    pageInfo {
25      hasNextPage
26    }
27  }
28}
29'''
30 
31# page_query = '''
32# query extractVariantProducts($after:String, $first:Int) {
33#   variantProducts (after:$after, first:$first){
34#     edges {
35#       node {
36#         ref
37#         status
38#         type
39#         name
40#         gtin
41#         summary
42#         updatedOn
43#         createdOn
44#       }
45#       cursor
46#     }
47#     pageInfo {
48#       hasNextPage
49#     }
50#   }
51# }
52# '''
53 
54# page_query = '''
55# query extractVirtualPositions($after: String, $first: Int) {
56#   virtualPositions(after: $after, first: $first) {
57#     edges {
58#       node {
59#         ref
60#         type
61#         status
62#         quantity
63#         productRef
64#         groupRef
65#         updatedOn
66#         createdOn
67#       }
68#       cursor
69#     }
70#     pageInfo {
71#       hasNextPage
72#     }
73#   }
74# }
75# '''
76accountId = '{{accountId}}'
77oauth_url = '{{fluentApiHost}}/oauth/token?username={{retailerUsername}}&password={{retailerPassword}}&client_id={{accountId}}&client_secret={{clientSecret}}&grant_type=password&scope=api'
78 
79# needs to correspond to the GQL query fields.
80entity = "inventoryPositions"
81fields = ["ref", "status", "productRef", "locationRef", "onHand", "updatedOn", "createdOn"]
82# entity = "variantProducts"
83# fields = ["ref", "status", "type", "name", "gtin", "summary", "updatedOn", "createdOn"]
84# entity = "virtualPositions"
85# fields = ["ref", "type", "status", "quantity", "productRef", "groupRef", "updatedOn", "createdOn"]
86# ######## CONFIGURATION SECTION END
87 
88graphql_url = 'https://' + accountId + '.api.fluentretail.com/graphql'
89filename = datetime.now().strftime("%Y%m%d") + '_' + accountId + '_' + entity + '.csv'
90 
91 
92def get_page_of_data(client, after, first):
93    res = client.execute(page_query, {'after': after, 'first': first})
94    data = json.loads(res)['data']
95    return data
96 
97 
98def get_token():
99    auth_token_response = requests.post(oauth_url)
100    if auth_token_response.status_code == 200:
101        print(auth_token_response.json())
102    else:
103        print("Couldn't get auth token {}".format(auth_token_response.status_code))
104    access_token = 'Bearer ' + auth_token_response.json()['access_token']
105    print("Access token: {}".format(access_token))
106    return access_token
107 
108 
109def init_client():
110    client = GraphQLClient(graphql_url)
111    client.inject_token(get_token())
112    return client
113 
114 
115def get_all_data(client=init_client(), all_labels=[], cursor=None, first=5000, retries_left=5):
116    global data
117    try:
118        data = get_page_of_data(client, cursor, first)
119    except IncompleteRead:
120        print('reconnect and keep tracking')
121        if retries_left > 0:
122            retries_left = retries_left - 1
123            client = init_client()
124            get_all_data(client, all_labels, cursor, retries_left)
125        else:
126            print('retries exhausted')
127            return all_labels
128    except:
129        print("error occurred for cursor: {} retrying with retries left:{}".format(cursor, retries_left))
130        if retries_left > 0:
131            retries_left = retries_left - 1
132            get_all_data(client, all_labels, cursor, retries_left)
133        else:
134            print('retries exhausted')
135            return all_labels
136    if data is not None:
137        new_labels = [] or data[entity]['edges']
138        has_next_page = data[entity]['pageInfo']['hasNextPage']
139        all_labels = all_labels + new_labels
140        if has_next_page:
141            if len(new_labels) > 0:
142                cursor = get_cursor(new_labels)
143            all_labels = get_all_data(client, all_labels, cursor)
144    return all_labels
145 
146 
147def get_cursor(new_labels):
148    if len(new_labels) > 0:
149        last = len(new_labels) - 1
150        while True:
151            if 'cursor' in new_labels[last]:
152                return new_labels[last]['cursor']
153            else:
154                last -= 1
155                if 'cursor' in new_labels[last]:
156                    return new_labels[last]['cursor']
157    else:
158        pass
159 
160 
161def write_to_csv(all_labels):
162    file = csv.writer(open(filename, "wt"), delimiter=',')
163    file.writerow(fields)
164    for data in all_labels:
165        if len(data.keys()) > 0:
166            row = []
167            for field in fields:
168                row.append(data['node'][field])
169            file.writerow(row)
170 
171 
172result = get_all_data()
173write_to_csv(result)

Extract various positions

The script is ready to extract 3 different types and the corresponding sections need to be commented out/in (other entities can be added to the script):

InventoryPositions: lines 10-29 & 83-84
VariantProducts: lines 31-51 & 85-86
VirtualPositions: lines 54-75 & 87-88

result

After executing successfully a csv file is produced, which will follow the filename pattern `yyyymmdd_{{acccountId}}_{{entityType}}.cs` e.g. `20191122_FLUENT_virtualPositions.csv`

Bulk extract on Store Location Hours

another example code to extract Store Hours:

1import json
2import csv
3import requests
4from graphqlclient import GraphQLClient
5from http.client import IncompleteRead
6from datetime import datetime
7 
8# ######## CONFIGURATION SECTION START
9 
10page_query = '''
11 query extractLocations($after:String, $first:Int) {
12  locations (after:$after, first:$first){
13   edges {
14      node {
15               rc_location_id:id
16               rc_location_ref:ref
17               rc_location_name:name
18               primaryAddress{
19                    addr_name:name
20                    addr_city:city
21                    addr_postcode:postcode
22                    addr_state:state
23                }
24               openingSchedule{
25                           rc_schedule_id:id
26                           rc_allHours:allHours
27                           rc_monStart:monStart
28                           rc_monEnd:monEnd
29                           rc_tueStart:tueStart
30                           rc_tueEnd:tueEnd
31                           rc_wedStart:wedStart
32                           rc_wedEnd:wedEnd
33                           rc_thuStart:thuStart
34                           rc_thuEnd:thuEnd
35                           rc_friStart:friStart
36                            rc_friEnd:friEnd
37                            rc_satStart:satStart
38                            rc_satEnd:satEnd
39                            rc_sunStart:sunStart
40                            rc_sunEnd:sunEnd
41                 }
42        }
43       cursor
44     }
45    pageInfo {
46      hasNextPage
47    }
48  }
49}
50'''
51 
52accountId = '{{accountId}}'
53oauth_url = '{{fluentApiHost}}/oauth/token?username={{retailerUsername}}&password={{retailerPassword}}&client_id={{accountId}}&client_secret={{clientSecret}}&grant_type=password&scope=api'
54 
55# needs to correspond to the GQL query fields.
56entity = "locations"
57fields = ["rc_location_id","rc_location_ref","rc_location_name",
58          "addr_name","addr_city","addr_postcode","addr_state",
59          "rc_schedule_id","rc_allHours",
60          "rc_monStart","rc_monEnd","rc_tueStart","rc_tueEnd","rc_wedStart","rc_wedEnd",
61          "rc_thuStart","rc_thuEnd","rc_friStart","rc_friEnd","rc_satStart","rc_satEnd",
62          "rc_sunStart","rc_sunEnd"]
63# ######## CONFIGURATION SECTION END
64 
65graphql_url = 'https://' + accountId + '.api.fluentretail.com/graphql'
66filename = datetime.now().strftime("%Y%m%d") + '_' + accountId + '_' + entity + '.csv'
67 
68 
69def get_page_of_data(client, after, first):
70    res = client.execute(page_query, {'after': after, 'first': first})
71    data = json.loads(res)['data']
72    return data
73 
74 
75def get_token():
76    auth_token_response = requests.post(oauth_url)
77    if auth_token_response.status_code == 200:
78        print(auth_token_response.json())
79    else:
80        print("Couldn't get auth token {}".format(auth_token_response.status_code))
81    access_token = 'Bearer ' + auth_token_response.json()['access_token']
82    print("Access token: {}".format(access_token))
83    return access_token
84 
85 
86def init_client():
87    client = GraphQLClient(graphql_url)
88    client.inject_token(get_token())
89    return client
90 
91 
92def get_all_data(client=init_client(), all_labels=[], cursor=None, first=5000, retries_left=5):
93    global data
94    try:
95        data = get_page_of_data(client, cursor, first)
96    except IncompleteRead:
97        print('reconnect and keep tracking')
98        if retries_left > 0:
99            retries_left = retries_left - 1
100            client = init_client()
101            get_all_data(client, all_labels, cursor, retries_left)
102        else:
103            print('retries exhausted')
104            return all_labels
105    except:
106        print("error occurred for cursor: {} retrying with retries left:{}".format(cursor, retries_left))
107        if retries_left > 0:
108            retries_left = retries_left - 1
109            get_all_data(client, all_labels, cursor, retries_left)
110        else:
111            print('retries exhausted')
112            return all_labels
113    if data is not None:
114        new_labels = [] or data[entity]['edges']
115        has_next_page = data[entity]['pageInfo']['hasNextPage']
116        all_labels = all_labels + new_labels
117        if has_next_page:
118            if len(new_labels) > 0:
119                cursor = get_cursor(new_labels)
120            all_labels = get_all_data(client, all_labels, cursor)
121    return all_labels
122 
123 
124def get_cursor(new_labels):
125    if len(new_labels) > 0:
126        last = len(new_labels) - 1
127        while True:
128            if 'cursor' in new_labels[last]:
129                return new_labels[last]['cursor']
130            else:
131                last -= 1
132                if 'cursor' in new_labels[last]:
133                    return new_labels[last]['cursor']
134    else:
135        pass
136 
137 
138def write_to_csv(all_labels):
139    file = csv.writer(open(filename, "wt"), delimiter=',')
140    file.writerow(fields)
141    for data in all_labels:
142        if len(data.keys()) > 0:
143            row = []
144            for field in fields:
145                print("fields: {}".format(field))
146                if field == "rc_schedule_id":
147                    row.append(data['node']['openingSchedule']['rc_schedule_id'])
148                    print("fields rc_schedule_id: {}".format(data['node']['openingSchedule']['rc_schedule_id']))
149                elif field == "rc_allHours":
150                    row.append(data['node']['openingSchedule']['rc_allHours'])
151                    print("fields rc_allHours: {}".format(data['node']['openingSchedule']['rc_allHours']))
152                elif field == "rc_monStart":
153                    row.append(data['node']['openingSchedule']['rc_monStart'])
154                    print("fields rc_monStart: {}".format(data['node']['openingSchedule']['rc_monStart']))
155                elif field == "rc_monEnd":
156                    row.append(data['node']['openingSchedule']['rc_monEnd'])
157                    print("fields rc_monEnd: {}".format(data['node']['openingSchedule']['rc_monEnd']))
158                elif field == "rc_tueStart":
159                    row.append(data['node']['openingSchedule']['rc_tueStart'])
160                    print("fields rc_tueStart: {}".format(data['node']['openingSchedule']['rc_tueStart']))
161                elif field == "rc_tueEnd":
162                    row.append(data['node']['openingSchedule']['rc_tueEnd'])
163                    print("fields rc_tueEnd: {}".format(data['node']['openingSchedule']['rc_tueEnd']))
164                elif field == "rc_wedStart":
165                    row.append(data['node']['openingSchedule']['rc_wedStart'])
166                    print("fields rc_wedStart: {}".format(data['node']['openingSchedule']['rc_wedStart']))
167                elif field == "rc_wedEnd":
168                    row.append(data['node']['openingSchedule']['rc_wedEnd'])
169                    print("fields rc_wedEnd: {}".format(data['node']['openingSchedule']['rc_wedEnd']))
170                elif field == "rc_thuStart":
171                    row.append(data['node']['openingSchedule']['rc_thuStart'])
172                    print("fields rc_thuStart: {}".format(data['node']['openingSchedule']['rc_thuStart']))
173                elif field == "rc_thuEnd":
174                    row.append(data['node']['openingSchedule']['rc_thuEnd'])
175                    print("fields rc_thuEnd: {}".format(data['node']['openingSchedule']['rc_thuEnd']))
176                elif field == "rc_friStart":
177                    row.append(data['node']['openingSchedule']['rc_friStart'])
178                    print("fields rc_friStart: {}".format(data['node']['openingSchedule']['rc_friStart']))
179                elif field == "rc_friEnd":
180                    row.append(data['node']['openingSchedule']['rc_friEnd'])
181                    print("fields rc_friEnd: {}".format(data['node']['openingSchedule']['rc_friEnd']))
182                elif field == "rc_satStart":
183                    row.append(data['node']['openingSchedule']['rc_satStart'])
184                    print("fields rc_satStart: {}".format(data['node']['openingSchedule']['rc_satStart']))
185                elif field == "rc_satEnd":
186                    row.append(data['node']['openingSchedule']['rc_satEnd'])
187                    print("fields rc_satEnd: {}".format(data['node']['openingSchedule']['rc_satEnd']))
188                elif field == "rc_sunStart":
189                    row.append(data['node']['openingSchedule']['rc_sunStart'])
190                    print("fields sunStart: {}".format(data['node']['openingSchedule']['rc_sunStart']))
191                elif field == "rc_sunEnd":
192                    row.append(data['node']['openingSchedule']['rc_sunEnd'])
193                    print("fields rc_sunEnd: {}".format(data['node']['openingSchedule']['rc_sunEnd']))
194                elif field == "addr_name":
195                    #row.append(data['node']['primaryAddress']['addr_name'])
196                    row.append(str(data['node']['primaryAddress']['addr_name']).replace(","," "))
197                    print("fields addr_name: {}".format(data['node']['primaryAddress']['addr_name']))
198                elif field == "addr_city":
199                    row.append(str(data['node']['primaryAddress']['addr_city']).replace(","," "))
200                    print("fields addr_city: {}".format(data['node']['primaryAddress']['addr_city']))
201                elif field == "addr_postcode":
202                    row.append(data['node']['primaryAddress']['addr_postcode'])
203                    print("fields addr_postcode: {}".format(data['node']['primaryAddress']['addr_postcode']))
204                elif field == "addr_state":
205                    row.append(data['node']['primaryAddress']['addr_state'])
206                    print("fields addr_state: {}".format(data['node']['primaryAddress']['addr_state']))
207                else:
208                    row.append(data['node'][field])
209            file.writerow(row)
210        print("fields: {}".format(data))
211 
212result = get_all_data()
213write_to_csv(result)

Bulk Extracts with GraphQL

Key Points

Steps

Pre-Requisites

After setting up Python3 and a Python IDE import the python script below

Extract various positions

result

Bulk extract on Store Location Hours

Movyn John

Building Blocks

By Type

Helpful Resources

Quick Links