# First, specify the base Docker image.
# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.
# You can also use any other image from Docker Hub.
FROM apify/actor-python-playwright:3.12

# Second, copy just requirements.txt into the Actor image,
# since it should be the only file that affects the dependency install in the next step,
# in order to speed up the build
COPY requirements.txt ./

# Install the packages specified in requirements.txt,
# Print the installed Python version, pip version
# and all installed packages with their versions for debugging
RUN echo "Python version:" \
 && python --version \
 && echo "Pip version:" \
 && pip --version \
 && echo "Installing dependencies:" \
 && pip install -r requirements.txt \
 && echo "All installed Python packages:" \
 && pip freeze

# Next, copy the remaining files and directories with the source code.
# Since we do this after installing the dependencies, quick build will be really fast
# for most source file changes.
COPY . ./

# Use compileall to ensure the runnability of the Actor Python code.
RUN python3 -m compileall -q .

# Specify how to launch the source code of your Actor.
# By default, the "python3 -m src" command is run
CMD ["python3", "-m", "src"]

.actor/actor.json

{
    "actorSpecification": 1,
    "name": "investing_fx_historic_list",
    "title": "Investing.com FX Historical Data",
    "description": "Scrapes FX Data.",
    "version": "0.0",
    "buildTag": "latest",
    "meta": {
        "templateId": "python-crawlee-playwright"
    },
    "input": "./input_schema.json",
    "dockerfile": "./Dockerfile"
}

.actor/input_schema.json

{
    "title": "Investing FX Historical List Scraper",
    "type": "object",
    "schemaVersion": 1,
    "properties": {
        "currency1": {
            "title": "Currency 1",
            "type": "string",
            "description": "Currency 1",
            "default": "usd",
            "editor" : "textfield"
        },
        "currency2": {
            "title": "Currency 2",
            "type": "string",
            "description": "Currency 2",
            "default": "eur",
            "editor" : "textfield"
        }    }
}

src/main.py

1import asyncio
2
3from .main import main
4
5# Execute the Actor entry point.
6asyncio.run(main())

src/main.py

1"""This module defines the main entry point for the Apify Actor.
2
3Feel free to modify this file to suit your specific needs.
4
5To build Apify Actors, utilize the Apify SDK toolkit, read more at the official documentation:
6https://docs.apify.com/sdk/python
7"""
8
9from apify import Actor, Request
10from crawlee.playwright_crawler import PlaywrightCrawler, PlaywrightCrawlingContext
11from crawlee.proxy_configuration import ProxyConfiguration
12import asyncio
13from datetime import timedelta
14
15
16async def main() -> None:
17    """Main entry point for the Apify Actor.
18
19    This coroutine is executed using `asyncio.run()`, so it must remain an asynchronous function for proper execution.
20    Asynchronous execution is required for communication with Apify platform, and it also enhances performance in
21    the field of web scraping significantly.
22    """
23    async with Actor:
24        # Retrieve the Actor input, and use default values if not provided.
25        actor_input = await Actor.get_input() or {}
26        currency1 = actor_input.get('currency1')
27        currency2 = actor_input.get('currency2')
28        baseUrl = "https://investing.com/currencies/"
29        extraUrl = "-historical-data"
30        start_urls = [baseUrl+currency1+'-'+currency2+extraUrl]
31
32        # Exit if no start URLs are provided.
33        if not currency1 or not currency2:
34            Actor.log.info('No currency Input specified in actor input, exiting...')
35            await Actor.exit()
36
37        # Create a crawler.
38        proxy_configuration = ProxyConfiguration(
39           proxy_urls=[
40                'http://groups-BUYPROXIES94952:apify_proxy_JBaBH83nOlPaMzhQeRD92ftto6tVMp0Tnucp@proxy.apify.com:8000',
41           ]
42        )
43        
44        # chromium firefox webkit
45        crawler = PlaywrightCrawler(
46            # Limit the crawl to max requests. Remove or increase it for crawling all links.
47            max_requests_per_crawl=10,
48            headless=False,
49            browser_type='firefox',
50            proxy_configuration=proxy_configuration,
51            # request_handler_timeout=timedelta(seconds=90),
52        )
53
54        # Define a request handler, which will be called for every request.
55        @crawler.router.default_handler
56        async def request_handler(context: PlaywrightCrawlingContext) -> None:
57            url = context.request.url
58            Actor.log.info(f'Scraping {url}...')
59            await asyncio.sleep(20)
60
61            # Extract the desired data.
62            links = await context.page.locator("div table.freeze-column-w-1 tbody").inner_text()
63            tempList = links.strip().split('\n')
64            for item in tempList:
65                temp = item.strip().split('\t')
66                await context.push_data({'date':temp[0], 'price':temp[1], 'open':temp[2], 'high':temp[3], 'low':temp[4],'change':temp[6]})
67
68        # Run the crawler with the starting requests.
69        await crawler.run(start_urls)

.dockerignore

.git
.mise.toml
.nvim.lua
storage

# The rest is copied from https://github.com/github/gitignore/blob/main/Python.gitignore

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
#   For a library or package, you might want to ignore these files since the code is
#   intended to run in multiple environments; otherwise, check them in:
.python-version

# pdm
#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
#   in version control.
#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
#  and can be added to the global gitignore or merged into this file.  For a more nuclear
#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/

.gitignore

.mise.toml
.nvim.lua
storage

# The rest is copied from https://github.com/github/gitignore/blob/main/Python.gitignore

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
#   For a library or package, you might want to ignore these files since the code is
#   intended to run in multiple environments; otherwise, check them in:
.python-version

# pdm
#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
#   in version control.
#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
#  and can be added to the global gitignore or merged into this file.  For a more nuclear
#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/

requirements.txt

1# Feel free to add your Python dependencies below. For formatting guidelines, see:
2# https://pip.pypa.io/en/latest/reference/requirements-file-format/
3
4apify == 2.0.0
5crawlee[playwright]

Stock Dividends (investing.com)

pintostudio/stock-dividends-investing-com

The Stock Dividends Data Actor retrieves comprehensive dividend information for publicly traded stocks from Investing.com.

Pinto Studio

Crypto Recent Data

pintostudio/crypto-recent-data

The Crypto Recent Data (Investing.com) Actor is a powerful web scraping tool that fetches recent historical cryptocurrency data from Investing.com.

Pinto Studio

Stock Earnings

pintostudio/stock-earnings

The Stock Earnings (investing.com) actor is a specialized web scraper designed to fetch comprehensive earnings data for publicly traded stocks from Investing.com.

Pinto Studio

Stock Historical Data (investing.com)

pintostudio/stock-historical-data-investing-com

This Apify actor scrapes historical stock data from Investing.com. Ideal for traders, analysts, researchers, and data scientists who need daily, weekly, or monthly stock price history.

Pinto Studio

Invest Crypto Historical

pintostudio/invest-crypto-historical

The Cryptocurrency Historical Data (Investing.com) Actor is a powerful tool that fetches historical cryptocurrency price data from Investing.com.

Pinto Studio

Stock Consensus Estimates (investing.com)

pintostudio/stock-consensus-estimates-investing-com

The Stock Consensus Estimates Data Actor fetches comprehensive consensus estimates and analyst price targets for stocks from Investing.com.

Pinto Studio

Stock Financials (investing.com)

pintostudio/stock-financials-investing-com

The Stock Financials Summary Data (Investing.com) Actor is a powerful tool that fetches comprehensive financial summary data for publicly traded companies from various global stock exchanges.

Pinto Studio

Cryptocurrency Overview (investing.com)

pintostudio/invest-crypto-Overview

This Actor is a powerful web scraping tool that fetches comprehensive cryptocurrency market data from Investing.com. It provides real-time information about the top cryptocurrencies, including prices, market capitalization, trading volumes, and percentage changes.

Pinto Studio

Crypto Info (investing.com)

pintostudio/crypto-info-investing-com

The Crypto Information Data (Investing.com) Actor is a powerful tool designed to fetch comprehensive cryptocurrency information from Investing.com.

Pinto Studio

Stock Earnings Transcript

pintostudio/stock-earnings-transcript

The Stock Earnings Transcript Data Actor is a powerful tool that fetches comprehensive earnings transcript data and fundamental financial information from Investing.com for specified stock symbols.

Pinto Studio

Stock Recent Data

pintostudio/stock-recent-data

The Stock Recent Data (Investing.com) actor is a powerful tool designed to fetch recent stock market data from Investing.com for any specified stock symbol across multiple countries.

Pinto Studio

Investing FX Historical Data

Investing FX Historical Data

.actor/Dockerfile

.actor/actor.json

.actor/input_schema.json

src/__main__.py

src/main.py

.dockerignore

.gitignore

requirements.txt

You might also like

Stock Dividends (investing.com)

Crypto Recent Data

Stock Earnings

Stock Historical Data (investing.com)

Invest Crypto Historical

Stock Consensus Estimates (investing.com)

Stock Financials (investing.com)

Cryptocurrency Overview (investing.com)

Crypto Info (investing.com)

Stock Earnings Transcript

Stock Recent Data

.actor/Dockerfile

.actor/actor.json

.actor/input_schema.json

src/__main__.py

src/main.py

.dockerignore

.gitignore

requirements.txt

src/main.py

src/main.py