Revision: 72840
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at February 15, 2018 10:38 by martinson
Initial Code
---
config:
agent: Firefox
debug: 2
do:
- variable_set:
field: username
value: YOU_ACCOUNT_USERNAME_HERE
- variable_set:
field: password
value: YOU_ACCOUNT_PASSWORD_HERE
- variable_set:
field: accounts
value: LIST OF USERNAMES YOU WANT TO EXTRACT, COMMA SEPARATED
- walk:
to: https://www.instagram.com/
do:
- find:
path: body
do:
- parse:
filter: window\._sharedData\s+\=\s+([^;]+);
- normalize:
routine: json2xml
- to_block
- find:
path: config>csrf_token
do:
- parse
- variable_set: token
- walk:
to:
post: https://www.instagram.com/accounts/login/ajax/
headers:
x-csrftoken: <%token%>
x-instagram-ajax: 1
x-requested-with: XMLHttpRequest
data:
username: <%username%>
password: <%password%>
do:
- find:
path: status
do:
- parse
- if:
match: "fail"
do:
- cannot_login_probably_checkpoint_is_required
- exit
- find:
path: authenticated
do:
- parse
- if:
match: "true"
else:
- wrong_login_or_password
- exit
- cookie_get: mid
- variable_set: mid
- cookie_get: rur
- variable_set: rur
- cookie_get: ds_user_id
- variable_set: dsuserid
- cookie_get: sessionid
- variable_set: sessionid
- variable_get: accounts
- to_block
- split:
context: text
delimiter: ','
- find:
path: div.splitted
do:
- parse
- space_dedupe
- trim
- variable_set: account
- walk:
to: https://www.instagram.com/<%account%>/?__a=1
do:
- find:
path: graphql > user > id
do:
- parse
- variable_set: id
- walk:
to: https://i.instagram.com/api/v1/users/<%id%>/info/
headers:
X-IG-App-ID: 567067343352427
X-IG-Capabilities: 3brDAw==
X-IG-Connection-Type: WIFI
X-IG-Connection-Speed: 3400
X-IG-Bandwidth-Speed-KBPS: -1.000
X-IG-Bandwidth-TotalBytes-B: 0
X-IG-Bandwidth-TotalTime-MS: 0
Cookie: mid=<%mid%>; csrftoken=<%token%>; rur=<%rur%>; ds_user_id=<%dsuserid%>; sessionid=<%sessionid%>; ig_or=;
X-FB-HTTP-Engine: Liger
Accept: '*/*'
Accept-Language: en-US
do:
- find:
path: body_safe > user
do:
- object_new: item
- find:
path: address_street
do:
- parse
- space_dedupe
- trim
- object_field_set:
object: item
field: address_street
- find:
path: category
do:
- parse
- space_dedupe
- trim
- object_field_set:
object: item
field: category
- find:
path: city_name
do:
- parse
- space_dedupe
- trim
- object_field_set:
object: item
field: city_name
- find:
path: contact_phone_number
do:
- parse
- space_dedupe
- trim
- object_field_set:
object: item
field: contact_phone_number
- find:
path: external_url
do:
- parse
- space_dedupe
- trim
- object_field_set:
object: item
field: external_url
- find:
path: full_name
do:
- parse
- space_dedupe
- trim
- object_field_set:
object: item
field: full_name
- find:
path: is_business
do:
- parse
- space_dedupe
- trim
- object_field_set:
object: item
field: is_business
- find:
path: latitude
do:
- parse
- space_dedupe
- trim
- object_field_set:
object: item
field: latitude
- find:
path: longitude
do:
- parse
- space_dedupe
- trim
- object_field_set:
object: item
field: longitude
- find:
path: pk
do:
- parse
- space_dedupe
- trim
- object_field_set:
object: item
field: id
- find:
path: public_email
do:
- parse
- space_dedupe
- trim
- object_field_set:
object: item
field: public_email
- find:
path: public_phone_country_code
do:
- parse
- space_dedupe
- trim
- object_field_set:
object: item
field: public_phone_country_code
- find:
path: public_phone_number
do:
- parse
- space_dedupe
- trim
- object_field_set:
object: item
field: public_phone_number
- find:
path: username
do:
- parse
- space_dedupe
- trim
- object_field_set:
object: item
field: username
- find:
path: zip
do:
- parse
- space_dedupe
- trim
- object_field_set:
object: item
field: zip
- object_save:
name: item
- sleep: 5
Initial URL
https://www.diggernaut.com
Initial Description
To use this free scraper for instagram business profiles you need to have account at known [web scraping service](https://www.diggernaut.com/). This scraper allow you to scrape contact details from business profiles and also indicate if profile is business or not. Scraper uses mobile API, so you will need to use instagram login and password. **MAKE SURE YOU DONT USE YOUR MAIN ACCOUNT**. API usage is unofficial and you are using it on your own risk. So to use it you need to login to your Diggernaut account, create project, then create a digger and then click on "Add configuration" button and copy&paste below scraper code there. You need to set your instagram username at line 8, instagram password at line 11 and list of usernames you want to retrieve data for (as comma separated list) at the line 14. Then save your configuration and run the digger. In some time you should be able to download data.
Initial Title
Instagram Business Profile Scraper
Initial Tags
free, web, Business
Initial Language
Other