This actor uses the GPT-2 language model to generate text.

- Modified
- Users16
- Runs85
.dockerignore
# configurations
.idea
# crawlee and apify storage folders
apify_storage
crawlee_storage
storage
# installed files
node_modules
# git folder
.git
Dockerfile
FROM huggingface/transformers-pytorch-gpu
COPY requirements.txt ./
RUN echo "Python version:" \
&& python3 --version \
&& echo "Pip version:" \
&& pip --version \
&& echo "Installing dependencies from requirements.txt:" \
&& pip install -r requirements.txt \
&& echo "All installed Python packages:" \
&& pip freeze
COPY . ./
RUN python3 -c 'from transformers import pipeline; generator = pipeline("text-generation", model="gpt2")'
CMD python3 main.py
INPUT_SCHEMA.json
{
"title": "Generate text",
"description": "This is actor input schema",
"type": "object",
"schemaVersion": 1,
"properties": {
"prompt": {
"title": "Initial prompt",
"type": "string",
"description": "The initial prompt used as basis for text generation",
"default": "My name is Apify and I like to",
"editor": "textarea"
},
"max_length": {
"title": "Maximum sequence length",
"type": "integer",
"description": "The maximum length of the generated sequence",
"default": 30,
"editor": "number"
}
},
"required": [
"prompt",
"max_length"
]
}
README.md
# GPT-2 text generation
This actor uses the GPT-2 language model to generate text.
For more information about the model, see [https://huggingface.co/gpt2](https://huggingface.co/gpt2).
main.py
import os
from transformers import pipeline
from apify_client import ApifyClient
if __name__ == '__main__':
client = ApifyClient(os.environ['APIFY_TOKEN'], api_url=os.environ['APIFY_API_BASE_URL'])
default_kv_store_client = client.key_value_store(os.environ['APIFY_DEFAULT_KEY_VALUE_STORE_ID'])
actor_input = default_kv_store_client.get_record(os.environ['APIFY_INPUT_KEY'])['value']
default_dataset_client = client.dataset(os.environ['APIFY_DEFAULT_DATASET_ID'])
generator = pipeline('text-generation', model='gpt2')
output = generator(actor_input["prompt"], max_length=actor_input["max_length"], num_return_sequences=1)
default_dataset_client.push_items(output)
requirements.txt
# Add your dependencies here.
# See https://pip.pypa.io/en/latest/cli/pip_install/#requirements-file-format
# for how to format them
apify_client