Carlendar2.0 Datagen avatar

Carlendar2.0 Datagen

Deprecated
View all Actors
This Actor is deprecated

This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?

See alternative Actors
Carlendar2.0 Datagen

Carlendar2.0 Datagen

mashnoon33/coruses-greedy-winter-18

Dockerfile

1# Dockerfile contains instructions how to build a Docker image that
2# will contain all the code and configuration needed to run your actor.
3# For a full Dockerfile reference,
4# see https://docs.docker.com/engine/reference/builder/
5
6# First, specify the base Docker image. Apify provides the following
7# base images for your convenience:
8#  apify/actor-node-basic (Node.js 10 on Alpine Linux, small and fast)
9#  apify/actor-node-chrome (Node.js 10 + Chrome on Debian)
10#  apify/actor-node-chrome-xvfb (Node.js 10 + Chrome + Xvfb on Debian)
11# For more information, see https://docs.apify.com/actor/build#base-images
12# Note that you can use any other image from Docker Hub.
13FROM apify/actor-node-basic
14
15# Second, copy just package.json since it should be the only file
16# that affects NPM install in the next step
17COPY package.json ./
18
19# Install NPM packages, skip optional and development dependencies to
20# keep the image small. Avoid logging too much and print the dependency
21# tree for debugging
22RUN npm --quiet set progress=false \
23 && npm install --only=prod --no-optional \
24 && echo "Installed NPM packages:" \
25 && npm list || true \
26 && echo "Node.js version:" \
27 && node --version \
28 && echo "NPM version:" \
29 && npm --version
30
31# Next, copy the remaining files and directories with the source code.
32# Since we do this after NPM install, quick build will be really fast
33# for most source file changes.
34COPY . ./
35
36# Optionally, specify how to launch the source code of your actor.
37# By default, Apify's base Docker images define the CMD instruction
38# that runs the source code using the command specified
39# in the "scripts.start" section of the package.json file.
40# In short, the instruction looks something like this:
41# CMD npm start

INPUT_SCHEMA.json

1{
2    "title": "My input schema",
3    "type": "object",
4    "schemaVersion": 1,
5    "properties": {
6        "myField": {
7            "title": "My input field",
8            "type": "string",
9            "nullable": false,
10            "description": "This is a customizable description providing help to the users of your actor.",
11            "editor": "textarea"
12        }
13    }
14}

main.js

1// This is the main Node.js source code file of your actor.
2// It is referenced from the "scripts" section of the package.json file.
3
4const Apify = require('apify');
5Apify.main(async () => {
6    // Get input of the actor. Input fields can be modified in INPUT_SCHEMA.json file.
7    // For more information, see https://docs.apify.com/actor/input-schema
8    const input = await Apify.getInput();
9    console.log('Input:');
10    console.dir(input);
11
12
13
14    // Here you can prepare your input for actor apify/legacy-phantomjs-crawler this input is based on a actor
15    // task you used as the starting point.
16    const metamorphInput = {
17        "startUrls": [
18            {
19                "key": "START",
20                "value": "http://example.com"
21            }
22        ],
23        "crawlPurls": [
24            {
25                "key": "",
26                "value": "https://apps.carleton.edu/campus/registrar/schedule/enroll/?term=21FA&subject=[\\D+]"
27            }
28        ],
29
30        "clickableElementsSelector": null,
31        "pageFunction": function pageFunction(context) {
32            var $ = context.jQuery;
33            context.customData
34
35            if (context.request.label === "START") {
36                var SPREADSHEET_ID = "1nMnWyWLexxv4MZg7YOTS2YJT4c44RFGF4nThhfLelyY";
37                var NUMBER_OF_SHEETS = 1;
38                const majors = ["AFST",
39"AMMU",
40"AMST",
41"ARBC",
42"ARCN",
43"ARTH",
44"ASST",
45"ASTR",
46"BIOL",
47"CHEM",
48"CHIN",
49"CAMS",
50"CLAS",
51"CGSC",
52"CS",
53"CCST",
54"DANC",
55"ECON",
56"EDUC",
57"ENGL",
58"ENTS",
59"EUST",
60"FREN",
61"GEOL",
62"GERM",
63"GRK",
64"HIST",
65"IDSC",
66"JAPN",
67"LATN",
68"LTAM",
69"LING",
70"LCST",
71"MATH",
72"MARS",
73"MEST",
74"MELA",
75"MUSC",
76"NEUR",
77"PHIL",
78"PE",
79"PHYS",
80"POSC",
81"PSYC",
82"RELG",
83"RUSS",
84"SOAN",
85"SPAN",
86"ARTS",
87"THEA",
88"WGST"]
89                var loadData = function(id) {
90                    var urlAPI = "https://spreadsheets.google.com/feeds/list/" + SPREADSHEET_ID + "/" + id + "/public/values?alt=json";
91                    $.get(urlAPI, function(data) {
92                        var entries = data.feed.entry;
93                        $.each(entries, function(index, value) {
94                            var url = value.title.$t;
95                            context.enqueuePage(url);
96                        });
97                        if (id === NUMBER_OF_SHEETS) {
98                            context.finish();
99                        } else {
100                            loadData(id + 1);
101                        }
102                    });
103                };
104                loadData(1);
105                context.skipOutput();
106                context.willFinishLater();
107            } else {
108                var results = [];
109                $(".course").each(function() {
110
111                     
112                        if (( $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(1)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(1)").find(".end").text()).length ==3) {
113                        
114                                                x_m = null;
115        
116                        }
117                        else {x_m = ( $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(1)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(1)").find(".end").text())
118                            
119                            
120                        }
121                    
122                    results.push({
123                    value: results.length,
124                    // CourseNum: $(this).find(".coursenum").text(),
125                                id: $(this).find(".coursenum").text(),
126                                                                key: $(this).find(".coursenum").text(),
127
128                                            // name: $(this).find(".coursenum").text(),
129        
130                    text: $(this).find(".title").text().replace($(this).find(".coursenum").text(),"").replace($(this).find(".credits").text(),"").trim(),
131        
132                    credits: $(this).find(".credits").text(),
133                    // Details: $(this).find(".title").text().replace($(this).find(".coursenum").text(),"").replace($(this).find(".credits").text(),"").trim(),
134                    location: $(this).find(".data").find(".schedule").find(".locations").text(),
135                    instructor: $(this).find(".faculty").text().trim(),
136                    // description: $(this).find(".description").find("p:nth-child(2)").text(),
137                    // Days: $(this).find(".data").find(".schedule").find(".schedule").find("thead").find(".used").text(),
138                    // start: $(this).find(".data").find(".schedule").find(".schedule").find("td:nth-child(1)").text(), 
139                    // end: $(this).find(".data").find(".schedule").find(".schedule").find("tbody").find(".used").text(), "ADD END"
140             mo: ( $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(1)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(1)").find(".end").text()).length ==3 ? null : ( $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(1)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(1)").find(".end").text()).replace(/am/g, " am").replace(/pm/g, " pm"),
141        
142        tu: ($(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(2)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(2)").find(".end").text()).length == 3 ? null : ($(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(2)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(2)").find(".end").text()).replace(/am/g, " am").replace(/pm/g, " pm"), 
143        we: ($(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(3)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(3)").find(".end").text()).length == 3 ? null : ($(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(3)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(3)").find(".end").text()).replace(/am/g, " am").replace(/pm/g, " pm"),
144        th: ($(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(4)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(4)").find(".end").text()).length == 3 ? null : ($(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(4)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(4)").find(".end").text()).replace(/am/g, " am").replace(/pm/g, " pm"),
145        fr: ($(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(5)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(5)").find(".end").text()).length == 3 ? null : ($(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(5)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(5)").find(".end").text()).replace(/am/g, " am").replace(/pm/g, " pm"),
146        mo_lab: ($(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(1)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(1)").find(".end").text()).length == 3 ? null : ($(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(1)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(1)").find(".end").text()).replace(/am/g, " am").replace(/pm/g, " pm"),
147        tu_lab: ($(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(2)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(2)").find(".end").text()).length == 3 ? null : ($(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(2)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(2)").find(".end").text()).replace(/am/g, " am").replace(/pm/g, " pm"),
148        we_lab: ($(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(3)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(3)").find(".end").text()).length == 3 ? null : ($(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(3)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(3)").find(".end").text()).replace(/am/g, " am").replace(/pm/g, " pm"),
149        th_lab: ($(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(4)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(4)").find(".end").text()).length == 3 ? null : ($(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(4)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(4)").find(".end").text()).replace(/am/g, " am").replace(/pm/g, " pm"),
150        fr_lab: ($(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(5)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(5)").find(".end").text()).length == 3 ? null : ($(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(5)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(5)").find(".end").text()).replace(/am/g, " am").replace(/pm/g, " pm"),
151        
152                        });
153                     
154                    });
155                return results;
156            }
157        },
158        "interceptRequest": function interceptRequest(context, newRequest) {
159            // called whenever the crawler finds a link to a new page,
160            // use it to override default behavior
161            return newRequest;
162        },
163        "considerUrlFragment": false,
164        "loadImages": true,
165        "loadCss": true,
166        "injectJQuery": true,
167        "injectUnderscoreJs": false,
168        "ignoreRobotsTxt": false,
169        "skipLoadingFrames": false,
170        "verboseLog": true,
171        "disableWebSecurity": false,
172        "rotateUserAgents": false,
173        "maxCrawledPages": null,
174        "maxOutputPages": null,
175        "maxCrawlDepth": null,
176        "resourceTimeout": null,
177        "pageLoadTimeout": null,
178        "pageFunctionTimeout": null,
179        "maxInfiniteScrollHeight": null,
180        "randomWaitBetweenRequests": null,
181        "maxCrawledPagesPerSlave": null,
182        "customHttpHeaders": null,
183        "customData": null,
184        "cookies": null,
185        "cookiesPersistence": "PER_PROCESS",
186        "finishWebhookUrl": null,
187        "finishWebhookData": null,
188        "maxParallelRequests": 1,
189        "proxyConfiguration": {
190            "useApifyProxy": false
191        }
192    };
193
194    // Now let's metamorph into actor apify/legacy-phantomjs-crawler using the created input.
195    await Apify.metamorph('apify/legacy-phantomjs-crawler', metamorphInput);
196});

package.json

1{
2    "name": "my-actor",
3    "version": "0.0.1",
4    "dependencies": {
5        "apify": "^0.17.0"
6    },
7    "scripts": {
8        "start": "node main.js"
9    },
10    "author": "Me!"
11}
Developer
Maintained by Community
Categories