Text Field Name (BYOD) byod_text_fieldstring Optional
Default value of this property is "text"
max_byod_documentsinteger Optional
Default value of this property is 500
crawl_modeEnum Optional
"single_page": string "same_domain": string "same_subdomain": string "all_links": string Default value of this property is "same_domain"
Maximum Pages max_pagesinteger Optional
Default value of this property is 100
max_depthinteger Optional
Default value of this property is 3
Content Selectors content_selectorsarray Optional
exclude_selectorsarray Optional
min_word_countinteger Optional
Default value of this property is 100
max_word_countinteger Optional
Default value of this property is 50000
deduplicateboolean Optional
Default value of this property is true
dedup_threshold_percentinteger Optional
Default value of this property is 85
quality_filterboolean Optional
Default value of this property is true
min_quality_score_percentinteger Optional
Default value of this property is 50
language_filterarray Optional
chunk_documentsboolean Optional
Default value of this property is false
chunk_sizeinteger Optional
Default value of this property is 512
chunk_overlapinteger Optional
Default value of this property is 64
output_formatEnum Optional
"jsonl": string "json": string "csv": string Default value of this property is "jsonl"
Text Field Name text_field_namestring Optional
Default value of this property is "text"
include_metadataboolean Optional
Default value of this property is true
include_raw_htmlboolean Optional
Default value of this property is false
remove_urlsboolean Optional
Default value of this property is false
remove_emailsboolean Optional
Default value of this property is true
normalize_whitespaceboolean Optional
Default value of this property is true
use_proxiesboolean Optional
Default value of this property is false
max_concurrencyinteger Optional
Default value of this property is 10
request_delay_msinteger Optional
Default value of this property is 500