Fluent Commerce Logo
Docs
Sign In

Bulk Extracts with GraphQL

How-to Guide

Author:

Movyn John

Changed on:

20 Nov 2023

Key Points

  • A python script can be used for ad-hoc extracts of a large number of GQL entities into a CSV file. 
  • Currently the Fluent platform does not support any bulk extracts. 
  • This scripts helps to extract data in bulk using the pagination functionality provided by the schema.
  • Depending on the amount of data that is extracted the script might run for a while.
  • The above codes are provided as is and are for reference purposes. 

Steps

Step arrow right iconPre-Requisites

  • Python3 (brew install python3)
  • IDE (e.g. PyCharm)
  • Some developer/python experience

Step arrow right iconAfter setting up Python3 and a Python IDE import the python script below

1import json
2import csv
3import requests
4from graphqlclient import GraphQLClient
5from http.client import IncompleteRead
6from datetime import datetime
7 
8# ######## CONFIGURATION SECTION START
9page_query = '''
10query extractInventoryPositions($after:String, $first:Int) {
11  inventoryPositions(after:$after, first:$first){
12    edges {
13      node {
14        ref
15        status
16        productRef
17        locationRef
18        onHand
19        updatedOn
20        createdOn
21      }
22      cursor
23    }
24    pageInfo {
25      hasNextPage
26    }
27  }
28}
29'''
30 
31# page_query = '''
32# query extractVariantProducts($after:String, $first:Int) {
33#   variantProducts (after:$after, first:$first){
34#     edges {
35#       node {
36#         ref
37#         status
38#         type
39#         name
40#         gtin
41#         summary
42#         updatedOn
43#         createdOn
44#       }
45#       cursor
46#     }
47#     pageInfo {
48#       hasNextPage
49#     }
50#   }
51# }
52# '''
53 
54# page_query = '''
55# query extractVirtualPositions($after: String, $first: Int) {
56#   virtualPositions(after: $after, first: $first) {
57#     edges {
58#       node {
59#         ref
60#         type
61#         status
62#         quantity
63#         productRef
64#         groupRef
65#         updatedOn
66#         createdOn
67#       }
68#       cursor
69#     }
70#     pageInfo {
71#       hasNextPage
72#     }
73#   }
74# }
75# '''
76accountId = '{{accountId}}'
77oauth_url = '{{fluentApiHost}}/oauth/token?username={{retailerUsername}}&password={{retailerPassword}}&client_id={{accountId}}&client_secret={{clientSecret}}&grant_type=password&scope=api'
78 
79# needs to correspond to the GQL query fields.
80entity = "inventoryPositions"
81fields = ["ref", "status", "productRef", "locationRef", "onHand", "updatedOn", "createdOn"]
82# entity = "variantProducts"
83# fields = ["ref", "status", "type", "name", "gtin", "summary", "updatedOn", "createdOn"]
84# entity = "virtualPositions"
85# fields = ["ref", "type", "status", "quantity", "productRef", "groupRef", "updatedOn", "createdOn"]
86# ######## CONFIGURATION SECTION END
87 
88graphql_url = 'https://' + accountId + '.api.fluentretail.com/graphql'
89filename = datetime.now().strftime("%Y%m%d") + '_' + accountId + '_' + entity + '.csv'
90 
91 
92def get_page_of_data(client, after, first):
93    res = client.execute(page_query, {'after': after, 'first': first})
94    data = json.loads(res)['data']
95    return data
96 
97 
98def get_token():
99    auth_token_response = requests.post(oauth_url)
100    if auth_token_response.status_code == 200:
101        print(auth_token_response.json())
102    else:
103        print("Couldn't get auth token {}".format(auth_token_response.status_code))
104    access_token = 'Bearer ' + auth_token_response.json()['access_token']
105    print("Access token: {}".format(access_token))
106    return access_token
107 
108 
109def init_client():
110    client = GraphQLClient(graphql_url)
111    client.inject_token(get_token())
112    return client
113 
114 
115def get_all_data(client=init_client(), all_labels=[], cursor=None, first=5000, retries_left=5):
116    global data
117    try:
118        data = get_page_of_data(client, cursor, first)
119    except IncompleteRead:
120        print('reconnect and keep tracking')
121        if retries_left > 0:
122            retries_left = retries_left - 1
123            client = init_client()
124            get_all_data(client, all_labels, cursor, retries_left)
125        else:
126            print('retries exhausted')
127            return all_labels
128    except:
129        print("error occurred for cursor: {} retrying with retries left:{}".format(cursor, retries_left))
130        if retries_left > 0:
131            retries_left = retries_left - 1
132            get_all_data(client, all_labels, cursor, retries_left)
133        else:
134            print('retries exhausted')
135            return all_labels
136    if data is not None:
137        new_labels = [] or data[entity]['edges']
138        has_next_page = data[entity]['pageInfo']['hasNextPage']
139        all_labels = all_labels + new_labels
140        if has_next_page:
141            if len(new_labels) > 0:
142                cursor = get_cursor(new_labels)
143            all_labels = get_all_data(client, all_labels, cursor)
144    return all_labels
145 
146 
147def get_cursor(new_labels):
148    if len(new_labels) > 0:
149        last = len(new_labels) - 1
150        while True:
151            if 'cursor' in new_labels[last]:
152                return new_labels[last]['cursor']
153            else:
154                last -= 1
155                if 'cursor' in new_labels[last]:
156                    return new_labels[last]['cursor']
157    else:
158        pass
159 
160 
161def write_to_csv(all_labels):
162    file = csv.writer(open(filename, "wt"), delimiter=',')
163    file.writerow(fields)
164    for data in all_labels:
165        if len(data.keys()) > 0:
166            row = []
167            for field in fields:
168                row.append(data['node'][field])
169            file.writerow(row)
170 
171 
172result = get_all_data()
173write_to_csv(result)

Language: json

Name: Python code data extract

Description:

[Warning: empty required content area]

Step arrow right iconExtract various positions

The script is ready to extract 3 different entity types and the corresponding sections need to be commented out/in (other entities can be added to the script):

  • InventoryPositions: lines 10-29 & 83-84
  • VariantProducts: lines 31-51 & 85-86
  • VirtualPositions: lines 54-75 & 87-88

Step arrow right iconresult

After executing successfully a csv file is produced, which will follow the filename pattern

`yyyymmdd_{{acccountId}}_{{entityType}}.cs`
e.g.
`20191122_FLUENT_virtualPositions.csv`

Step arrow right iconBulk extract on Store Location Hours

another example code to extract Store Location Hours:


1import json
2import csv
3import requests
4from graphqlclient import GraphQLClient
5from http.client import IncompleteRead
6from datetime import datetime
7 
8# ######## CONFIGURATION SECTION START
9 
10page_query = '''
11 query extractLocations($after:String, $first:Int) {
12  locations (after:$after, first:$first){
13   edges {
14      node {
15               rc_location_id:id
16               rc_location_ref:ref
17               rc_location_name:name
18               primaryAddress{
19                    addr_name:name
20                    addr_city:city
21                    addr_postcode:postcode
22                    addr_state:state
23                }
24               openingSchedule{
25                           rc_schedule_id:id
26                           rc_allHours:allHours
27                           rc_monStart:monStart
28                           rc_monEnd:monEnd
29                           rc_tueStart:tueStart
30                           rc_tueEnd:tueEnd
31                           rc_wedStart:wedStart
32                           rc_wedEnd:wedEnd
33                           rc_thuStart:thuStart
34                           rc_thuEnd:thuEnd
35                           rc_friStart:friStart
36                            rc_friEnd:friEnd
37                            rc_satStart:satStart
38                            rc_satEnd:satEnd
39                            rc_sunStart:sunStart
40                            rc_sunEnd:sunEnd
41                 }
42        }
43       cursor
44     }
45    pageInfo {
46      hasNextPage
47    }
48  }
49}
50'''
51 
52accountId = '{{accountId}}'
53oauth_url = '{{fluentApiHost}}/oauth/token?username={{retailerUsername}}&password={{retailerPassword}}&client_id={{accountId}}&client_secret={{clientSecret}}&grant_type=password&scope=api'
54 
55# needs to correspond to the GQL query fields.
56entity = "locations"
57fields = ["rc_location_id","rc_location_ref","rc_location_name",
58          "addr_name","addr_city","addr_postcode","addr_state",
59          "rc_schedule_id","rc_allHours",
60          "rc_monStart","rc_monEnd","rc_tueStart","rc_tueEnd","rc_wedStart","rc_wedEnd",
61          "rc_thuStart","rc_thuEnd","rc_friStart","rc_friEnd","rc_satStart","rc_satEnd",
62          "rc_sunStart","rc_sunEnd"]
63# ######## CONFIGURATION SECTION END
64 
65graphql_url = 'https://' + accountId + '.api.fluentretail.com/graphql'
66filename = datetime.now().strftime("%Y%m%d") + '_' + accountId + '_' + entity + '.csv'
67 
68 
69def get_page_of_data(client, after, first):
70    res = client.execute(page_query, {'after': after, 'first': first})
71    data = json.loads(res)['data']
72    return data
73 
74 
75def get_token():
76    auth_token_response = requests.post(oauth_url)
77    if auth_token_response.status_code == 200:
78        print(auth_token_response.json())
79    else:
80        print("Couldn't get auth token {}".format(auth_token_response.status_code))
81    access_token = 'Bearer ' + auth_token_response.json()['access_token']
82    print("Access token: {}".format(access_token))
83    return access_token
84 
85 
86def init_client():
87    client = GraphQLClient(graphql_url)
88    client.inject_token(get_token())
89    return client
90 
91 
92def get_all_data(client=init_client(), all_labels=[], cursor=None, first=5000, retries_left=5):
93    global data
94    try:
95        data = get_page_of_data(client, cursor, first)
96    except IncompleteRead:
97        print('reconnect and keep tracking')
98        if retries_left > 0:
99            retries_left = retries_left - 1
100            client = init_client()
101            get_all_data(client, all_labels, cursor, retries_left)
102        else:
103            print('retries exhausted')
104            return all_labels
105    except:
106        print("error occurred for cursor: {} retrying with retries left:{}".format(cursor, retries_left))
107        if retries_left > 0:
108            retries_left = retries_left - 1
109            get_all_data(client, all_labels, cursor, retries_left)
110        else:
111            print('retries exhausted')
112            return all_labels
113    if data is not None:
114        new_labels = [] or data[entity]['edges']
115        has_next_page = data[entity]['pageInfo']['hasNextPage']
116        all_labels = all_labels + new_labels
117        if has_next_page:
118            if len(new_labels) > 0:
119                cursor = get_cursor(new_labels)
120            all_labels = get_all_data(client, all_labels, cursor)
121    return all_labels
122 
123 
124def get_cursor(new_labels):
125    if len(new_labels) > 0:
126        last = len(new_labels) - 1
127        while True:
128            if 'cursor' in new_labels[last]:
129                return new_labels[last]['cursor']
130            else:
131                last -= 1
132                if 'cursor' in new_labels[last]:
133                    return new_labels[last]['cursor']
134    else:
135        pass
136 
137 
138def write_to_csv(all_labels):
139    file = csv.writer(open(filename, "wt"), delimiter=',')
140    file.writerow(fields)
141    for data in all_labels:
142        if len(data.keys()) > 0:
143            row = []
144            for field in fields:
145                print("fields: {}".format(field))
146                if field == "rc_schedule_id":
147                    row.append(data['node']['openingSchedule']['rc_schedule_id'])
148                    print("fields rc_schedule_id: {}".format(data['node']['openingSchedule']['rc_schedule_id']))
149                elif field == "rc_allHours":
150                    row.append(data['node']['openingSchedule']['rc_allHours'])
151                    print("fields rc_allHours: {}".format(data['node']['openingSchedule']['rc_allHours']))
152                elif field == "rc_monStart":
153                    row.append(data['node']['openingSchedule']['rc_monStart'])
154                    print("fields rc_monStart: {}".format(data['node']['openingSchedule']['rc_monStart']))
155                elif field == "rc_monEnd":
156                    row.append(data['node']['openingSchedule']['rc_monEnd'])
157                    print("fields rc_monEnd: {}".format(data['node']['openingSchedule']['rc_monEnd']))
158                elif field == "rc_tueStart":
159                    row.append(data['node']['openingSchedule']['rc_tueStart'])
160                    print("fields rc_tueStart: {}".format(data['node']['openingSchedule']['rc_tueStart']))
161                elif field == "rc_tueEnd":
162                    row.append(data['node']['openingSchedule']['rc_tueEnd'])
163                    print("fields rc_tueEnd: {}".format(data['node']['openingSchedule']['rc_tueEnd']))
164                elif field == "rc_wedStart":
165                    row.append(data['node']['openingSchedule']['rc_wedStart'])
166                    print("fields rc_wedStart: {}".format(data['node']['openingSchedule']['rc_wedStart']))
167                elif field == "rc_wedEnd":
168                    row.append(data['node']['openingSchedule']['rc_wedEnd'])
169                    print("fields rc_wedEnd: {}".format(data['node']['openingSchedule']['rc_wedEnd']))
170                elif field == "rc_thuStart":
171                    row.append(data['node']['openingSchedule']['rc_thuStart'])
172                    print("fields rc_thuStart: {}".format(data['node']['openingSchedule']['rc_thuStart']))
173                elif field == "rc_thuEnd":
174                    row.append(data['node']['openingSchedule']['rc_thuEnd'])
175                    print("fields rc_thuEnd: {}".format(data['node']['openingSchedule']['rc_thuEnd']))
176                elif field == "rc_friStart":
177                    row.append(data['node']['openingSchedule']['rc_friStart'])
178                    print("fields rc_friStart: {}".format(data['node']['openingSchedule']['rc_friStart']))
179                elif field == "rc_friEnd":
180                    row.append(data['node']['openingSchedule']['rc_friEnd'])
181                    print("fields rc_friEnd: {}".format(data['node']['openingSchedule']['rc_friEnd']))
182                elif field == "rc_satStart":
183                    row.append(data['node']['openingSchedule']['rc_satStart'])
184                    print("fields rc_satStart: {}".format(data['node']['openingSchedule']['rc_satStart']))
185                elif field == "rc_satEnd":
186                    row.append(data['node']['openingSchedule']['rc_satEnd'])
187                    print("fields rc_satEnd: {}".format(data['node']['openingSchedule']['rc_satEnd']))
188                elif field == "rc_sunStart":
189                    row.append(data['node']['openingSchedule']['rc_sunStart'])
190                    print("fields sunStart: {}".format(data['node']['openingSchedule']['rc_sunStart']))
191                elif field == "rc_sunEnd":
192                    row.append(data['node']['openingSchedule']['rc_sunEnd'])
193                    print("fields rc_sunEnd: {}".format(data['node']['openingSchedule']['rc_sunEnd']))
194                elif field == "addr_name":
195                    #row.append(data['node']['primaryAddress']['addr_name'])
196                    row.append(str(data['node']['primaryAddress']['addr_name']).replace(","," "))
197                    print("fields addr_name: {}".format(data['node']['primaryAddress']['addr_name']))
198                elif field == "addr_city":
199                    row.append(str(data['node']['primaryAddress']['addr_city']).replace(","," "))
200                    print("fields addr_city: {}".format(data['node']['primaryAddress']['addr_city']))
201                elif field == "addr_postcode":
202                    row.append(data['node']['primaryAddress']['addr_postcode'])
203                    print("fields addr_postcode: {}".format(data['node']['primaryAddress']['addr_postcode']))
204                elif field == "addr_state":
205                    row.append(data['node']['primaryAddress']['addr_state'])
206                    print("fields addr_state: {}".format(data['node']['primaryAddress']['addr_state']))
207                else:
208                    row.append(data['node'][field])
209            file.writerow(row)
210        print("fields: {}".format(data))
211 
212result = get_all_data()
213write_to_csv(result)

Language: python

Name: Python extract location hours

Description:

[Warning: empty required content area]