Bulk Extracts with GraphQL
Author:
Movyn John
Changed on:
20 Nov 2023
Key Points
- A python script can be used for ad-hoc extracts of a large number of GQL entities into a CSV file.
- Currently the Fluent platform does not support any bulk extracts.
- This scripts helps to extract data in bulk using the pagination functionality provided by the schema.
- Depending on the amount of data that is extracted the script might run for a while.
- The above codes are provided as is and are for reference purposes.
Steps
Pre-Requisites
- Python3 (brew install python3)
- IDE (e.g. PyCharm)
- Some developer/python experience
After setting up Python3 and a Python IDE import the python script below
1import json
2import csv
3import requests
4from graphqlclient import GraphQLClient
5from http.client import IncompleteRead
6from datetime import datetime
7
8# ######## CONFIGURATION SECTION START
9page_query = '''
10query extractInventoryPositions($after:String, $first:Int) {
11 inventoryPositions(after:$after, first:$first){
12 edges {
13 node {
14 ref
15 status
16 productRef
17 locationRef
18 onHand
19 updatedOn
20 createdOn
21 }
22 cursor
23 }
24 pageInfo {
25 hasNextPage
26 }
27 }
28}
29'''
30
31# page_query = '''
32# query extractVariantProducts($after:String, $first:Int) {
33# variantProducts (after:$after, first:$first){
34# edges {
35# node {
36# ref
37# status
38# type
39# name
40# gtin
41# summary
42# updatedOn
43# createdOn
44# }
45# cursor
46# }
47# pageInfo {
48# hasNextPage
49# }
50# }
51# }
52# '''
53
54# page_query = '''
55# query extractVirtualPositions($after: String, $first: Int) {
56# virtualPositions(after: $after, first: $first) {
57# edges {
58# node {
59# ref
60# type
61# status
62# quantity
63# productRef
64# groupRef
65# updatedOn
66# createdOn
67# }
68# cursor
69# }
70# pageInfo {
71# hasNextPage
72# }
73# }
74# }
75# '''
76accountId = '{{accountId}}'
77oauth_url = '{{fluentApiHost}}/oauth/token?username={{retailerUsername}}&password={{retailerPassword}}&client_id={{accountId}}&client_secret={{clientSecret}}&grant_type=password&scope=api'
78
79# needs to correspond to the GQL query fields.
80entity = "inventoryPositions"
81fields = ["ref", "status", "productRef", "locationRef", "onHand", "updatedOn", "createdOn"]
82# entity = "variantProducts"
83# fields = ["ref", "status", "type", "name", "gtin", "summary", "updatedOn", "createdOn"]
84# entity = "virtualPositions"
85# fields = ["ref", "type", "status", "quantity", "productRef", "groupRef", "updatedOn", "createdOn"]
86# ######## CONFIGURATION SECTION END
87
88graphql_url = 'https://' + accountId + '.api.fluentretail.com/graphql'
89filename = datetime.now().strftime("%Y%m%d") + '_' + accountId + '_' + entity + '.csv'
90
91
92def get_page_of_data(client, after, first):
93 res = client.execute(page_query, {'after': after, 'first': first})
94 data = json.loads(res)['data']
95 return data
96
97
98def get_token():
99 auth_token_response = requests.post(oauth_url)
100 if auth_token_response.status_code == 200:
101 print(auth_token_response.json())
102 else:
103 print("Couldn't get auth token {}".format(auth_token_response.status_code))
104 access_token = 'Bearer ' + auth_token_response.json()['access_token']
105 print("Access token: {}".format(access_token))
106 return access_token
107
108
109def init_client():
110 client = GraphQLClient(graphql_url)
111 client.inject_token(get_token())
112 return client
113
114
115def get_all_data(client=init_client(), all_labels=[], cursor=None, first=5000, retries_left=5):
116 global data
117 try:
118 data = get_page_of_data(client, cursor, first)
119 except IncompleteRead:
120 print('reconnect and keep tracking')
121 if retries_left > 0:
122 retries_left = retries_left - 1
123 client = init_client()
124 get_all_data(client, all_labels, cursor, retries_left)
125 else:
126 print('retries exhausted')
127 return all_labels
128 except:
129 print("error occurred for cursor: {} retrying with retries left:{}".format(cursor, retries_left))
130 if retries_left > 0:
131 retries_left = retries_left - 1
132 get_all_data(client, all_labels, cursor, retries_left)
133 else:
134 print('retries exhausted')
135 return all_labels
136 if data is not None:
137 new_labels = [] or data[entity]['edges']
138 has_next_page = data[entity]['pageInfo']['hasNextPage']
139 all_labels = all_labels + new_labels
140 if has_next_page:
141 if len(new_labels) > 0:
142 cursor = get_cursor(new_labels)
143 all_labels = get_all_data(client, all_labels, cursor)
144 return all_labels
145
146
147def get_cursor(new_labels):
148 if len(new_labels) > 0:
149 last = len(new_labels) - 1
150 while True:
151 if 'cursor' in new_labels[last]:
152 return new_labels[last]['cursor']
153 else:
154 last -= 1
155 if 'cursor' in new_labels[last]:
156 return new_labels[last]['cursor']
157 else:
158 pass
159
160
161def write_to_csv(all_labels):
162 file = csv.writer(open(filename, "wt"), delimiter=',')
163 file.writerow(fields)
164 for data in all_labels:
165 if len(data.keys()) > 0:
166 row = []
167 for field in fields:
168 row.append(data['node'][field])
169 file.writerow(row)
170
171
172result = get_all_data()
173write_to_csv(result)
Language: json
Name: Python code data extract
Description:
[Warning: empty required content area]Extract various positions
The script is ready to extract 3 different entity types and the corresponding sections need to be commented out/in (other entities can be added to the script):
- InventoryPositions: lines 10-29 & 83-84
- VariantProducts: lines 31-51 & 85-86
- VirtualPositions: lines 54-75 & 87-88
result
After executing successfully a csv file is produced, which will follow the filename pattern
`yyyymmdd_{{acccountId}}_{{entityType}}.cs`
`20191122_FLUENT_virtualPositions.csv`
Bulk extract on Store Location Hours
another example code to extract Store Location Hours:
1import json
2import csv
3import requests
4from graphqlclient import GraphQLClient
5from http.client import IncompleteRead
6from datetime import datetime
7
8# ######## CONFIGURATION SECTION START
9
10page_query = '''
11 query extractLocations($after:String, $first:Int) {
12 locations (after:$after, first:$first){
13 edges {
14 node {
15 rc_location_id:id
16 rc_location_ref:ref
17 rc_location_name:name
18 primaryAddress{
19 addr_name:name
20 addr_city:city
21 addr_postcode:postcode
22 addr_state:state
23 }
24 openingSchedule{
25 rc_schedule_id:id
26 rc_allHours:allHours
27 rc_monStart:monStart
28 rc_monEnd:monEnd
29 rc_tueStart:tueStart
30 rc_tueEnd:tueEnd
31 rc_wedStart:wedStart
32 rc_wedEnd:wedEnd
33 rc_thuStart:thuStart
34 rc_thuEnd:thuEnd
35 rc_friStart:friStart
36 rc_friEnd:friEnd
37 rc_satStart:satStart
38 rc_satEnd:satEnd
39 rc_sunStart:sunStart
40 rc_sunEnd:sunEnd
41 }
42 }
43 cursor
44 }
45 pageInfo {
46 hasNextPage
47 }
48 }
49}
50'''
51
52accountId = '{{accountId}}'
53oauth_url = '{{fluentApiHost}}/oauth/token?username={{retailerUsername}}&password={{retailerPassword}}&client_id={{accountId}}&client_secret={{clientSecret}}&grant_type=password&scope=api'
54
55# needs to correspond to the GQL query fields.
56entity = "locations"
57fields = ["rc_location_id","rc_location_ref","rc_location_name",
58 "addr_name","addr_city","addr_postcode","addr_state",
59 "rc_schedule_id","rc_allHours",
60 "rc_monStart","rc_monEnd","rc_tueStart","rc_tueEnd","rc_wedStart","rc_wedEnd",
61 "rc_thuStart","rc_thuEnd","rc_friStart","rc_friEnd","rc_satStart","rc_satEnd",
62 "rc_sunStart","rc_sunEnd"]
63# ######## CONFIGURATION SECTION END
64
65graphql_url = 'https://' + accountId + '.api.fluentretail.com/graphql'
66filename = datetime.now().strftime("%Y%m%d") + '_' + accountId + '_' + entity + '.csv'
67
68
69def get_page_of_data(client, after, first):
70 res = client.execute(page_query, {'after': after, 'first': first})
71 data = json.loads(res)['data']
72 return data
73
74
75def get_token():
76 auth_token_response = requests.post(oauth_url)
77 if auth_token_response.status_code == 200:
78 print(auth_token_response.json())
79 else:
80 print("Couldn't get auth token {}".format(auth_token_response.status_code))
81 access_token = 'Bearer ' + auth_token_response.json()['access_token']
82 print("Access token: {}".format(access_token))
83 return access_token
84
85
86def init_client():
87 client = GraphQLClient(graphql_url)
88 client.inject_token(get_token())
89 return client
90
91
92def get_all_data(client=init_client(), all_labels=[], cursor=None, first=5000, retries_left=5):
93 global data
94 try:
95 data = get_page_of_data(client, cursor, first)
96 except IncompleteRead:
97 print('reconnect and keep tracking')
98 if retries_left > 0:
99 retries_left = retries_left - 1
100 client = init_client()
101 get_all_data(client, all_labels, cursor, retries_left)
102 else:
103 print('retries exhausted')
104 return all_labels
105 except:
106 print("error occurred for cursor: {} retrying with retries left:{}".format(cursor, retries_left))
107 if retries_left > 0:
108 retries_left = retries_left - 1
109 get_all_data(client, all_labels, cursor, retries_left)
110 else:
111 print('retries exhausted')
112 return all_labels
113 if data is not None:
114 new_labels = [] or data[entity]['edges']
115 has_next_page = data[entity]['pageInfo']['hasNextPage']
116 all_labels = all_labels + new_labels
117 if has_next_page:
118 if len(new_labels) > 0:
119 cursor = get_cursor(new_labels)
120 all_labels = get_all_data(client, all_labels, cursor)
121 return all_labels
122
123
124def get_cursor(new_labels):
125 if len(new_labels) > 0:
126 last = len(new_labels) - 1
127 while True:
128 if 'cursor' in new_labels[last]:
129 return new_labels[last]['cursor']
130 else:
131 last -= 1
132 if 'cursor' in new_labels[last]:
133 return new_labels[last]['cursor']
134 else:
135 pass
136
137
138def write_to_csv(all_labels):
139 file = csv.writer(open(filename, "wt"), delimiter=',')
140 file.writerow(fields)
141 for data in all_labels:
142 if len(data.keys()) > 0:
143 row = []
144 for field in fields:
145 print("fields: {}".format(field))
146 if field == "rc_schedule_id":
147 row.append(data['node']['openingSchedule']['rc_schedule_id'])
148 print("fields rc_schedule_id: {}".format(data['node']['openingSchedule']['rc_schedule_id']))
149 elif field == "rc_allHours":
150 row.append(data['node']['openingSchedule']['rc_allHours'])
151 print("fields rc_allHours: {}".format(data['node']['openingSchedule']['rc_allHours']))
152 elif field == "rc_monStart":
153 row.append(data['node']['openingSchedule']['rc_monStart'])
154 print("fields rc_monStart: {}".format(data['node']['openingSchedule']['rc_monStart']))
155 elif field == "rc_monEnd":
156 row.append(data['node']['openingSchedule']['rc_monEnd'])
157 print("fields rc_monEnd: {}".format(data['node']['openingSchedule']['rc_monEnd']))
158 elif field == "rc_tueStart":
159 row.append(data['node']['openingSchedule']['rc_tueStart'])
160 print("fields rc_tueStart: {}".format(data['node']['openingSchedule']['rc_tueStart']))
161 elif field == "rc_tueEnd":
162 row.append(data['node']['openingSchedule']['rc_tueEnd'])
163 print("fields rc_tueEnd: {}".format(data['node']['openingSchedule']['rc_tueEnd']))
164 elif field == "rc_wedStart":
165 row.append(data['node']['openingSchedule']['rc_wedStart'])
166 print("fields rc_wedStart: {}".format(data['node']['openingSchedule']['rc_wedStart']))
167 elif field == "rc_wedEnd":
168 row.append(data['node']['openingSchedule']['rc_wedEnd'])
169 print("fields rc_wedEnd: {}".format(data['node']['openingSchedule']['rc_wedEnd']))
170 elif field == "rc_thuStart":
171 row.append(data['node']['openingSchedule']['rc_thuStart'])
172 print("fields rc_thuStart: {}".format(data['node']['openingSchedule']['rc_thuStart']))
173 elif field == "rc_thuEnd":
174 row.append(data['node']['openingSchedule']['rc_thuEnd'])
175 print("fields rc_thuEnd: {}".format(data['node']['openingSchedule']['rc_thuEnd']))
176 elif field == "rc_friStart":
177 row.append(data['node']['openingSchedule']['rc_friStart'])
178 print("fields rc_friStart: {}".format(data['node']['openingSchedule']['rc_friStart']))
179 elif field == "rc_friEnd":
180 row.append(data['node']['openingSchedule']['rc_friEnd'])
181 print("fields rc_friEnd: {}".format(data['node']['openingSchedule']['rc_friEnd']))
182 elif field == "rc_satStart":
183 row.append(data['node']['openingSchedule']['rc_satStart'])
184 print("fields rc_satStart: {}".format(data['node']['openingSchedule']['rc_satStart']))
185 elif field == "rc_satEnd":
186 row.append(data['node']['openingSchedule']['rc_satEnd'])
187 print("fields rc_satEnd: {}".format(data['node']['openingSchedule']['rc_satEnd']))
188 elif field == "rc_sunStart":
189 row.append(data['node']['openingSchedule']['rc_sunStart'])
190 print("fields sunStart: {}".format(data['node']['openingSchedule']['rc_sunStart']))
191 elif field == "rc_sunEnd":
192 row.append(data['node']['openingSchedule']['rc_sunEnd'])
193 print("fields rc_sunEnd: {}".format(data['node']['openingSchedule']['rc_sunEnd']))
194 elif field == "addr_name":
195 #row.append(data['node']['primaryAddress']['addr_name'])
196 row.append(str(data['node']['primaryAddress']['addr_name']).replace(","," "))
197 print("fields addr_name: {}".format(data['node']['primaryAddress']['addr_name']))
198 elif field == "addr_city":
199 row.append(str(data['node']['primaryAddress']['addr_city']).replace(","," "))
200 print("fields addr_city: {}".format(data['node']['primaryAddress']['addr_city']))
201 elif field == "addr_postcode":
202 row.append(data['node']['primaryAddress']['addr_postcode'])
203 print("fields addr_postcode: {}".format(data['node']['primaryAddress']['addr_postcode']))
204 elif field == "addr_state":
205 row.append(data['node']['primaryAddress']['addr_state'])
206 print("fields addr_state: {}".format(data['node']['primaryAddress']['addr_state']))
207 else:
208 row.append(data['node'][field])
209 file.writerow(row)
210 print("fields: {}".format(data))
211
212result = get_all_data()
213write_to_csv(result)
Language: python
Name: Python extract location hours
Description:
[Warning: empty required content area]