Carlendar2.0 Datagen
Go to Store
This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?
See alternative ActorsCarlendar2.0 Datagen
mashnoon33/coruses-greedy-winter-18
Dockerfile
1# Dockerfile contains instructions how to build a Docker image that
2# will contain all the code and configuration needed to run your actor.
3# For a full Dockerfile reference,
4# see https://docs.docker.com/engine/reference/builder/
5
6# First, specify the base Docker image. Apify provides the following
7# base images for your convenience:
8# apify/actor-node-basic (Node.js 10 on Alpine Linux, small and fast)
9# apify/actor-node-chrome (Node.js 10 + Chrome on Debian)
10# apify/actor-node-chrome-xvfb (Node.js 10 + Chrome + Xvfb on Debian)
11# For more information, see https://docs.apify.com/actor/build#base-images
12# Note that you can use any other image from Docker Hub.
13FROM apify/actor-node-basic
14
15# Second, copy just package.json since it should be the only file
16# that affects NPM install in the next step
17COPY package.json ./
18
19# Install NPM packages, skip optional and development dependencies to
20# keep the image small. Avoid logging too much and print the dependency
21# tree for debugging
22RUN npm --quiet set progress=false \
23 && npm install --only=prod --no-optional \
24 && echo "Installed NPM packages:" \
25 && npm list || true \
26 && echo "Node.js version:" \
27 && node --version \
28 && echo "NPM version:" \
29 && npm --version
30
31# Next, copy the remaining files and directories with the source code.
32# Since we do this after NPM install, quick build will be really fast
33# for most source file changes.
34COPY . ./
35
36# Optionally, specify how to launch the source code of your actor.
37# By default, Apify's base Docker images define the CMD instruction
38# that runs the source code using the command specified
39# in the "scripts.start" section of the package.json file.
40# In short, the instruction looks something like this:
41# CMD npm start
INPUT_SCHEMA.json
1{
2 "title": "My input schema",
3 "type": "object",
4 "schemaVersion": 1,
5 "properties": {
6 "myField": {
7 "title": "My input field",
8 "type": "string",
9 "nullable": false,
10 "description": "This is a customizable description providing help to the users of your actor.",
11 "editor": "textarea"
12 }
13 }
14}
main.js
1// This is the main Node.js source code file of your actor.
2// It is referenced from the "scripts" section of the package.json file.
3
4const Apify = require('apify');
5Apify.main(async () => {
6 // Get input of the actor. Input fields can be modified in INPUT_SCHEMA.json file.
7 // For more information, see https://docs.apify.com/actor/input-schema
8 const input = await Apify.getInput();
9 console.log('Input:');
10 console.dir(input);
11
12
13
14 // Here you can prepare your input for actor apify/legacy-phantomjs-crawler this input is based on a actor
15 // task you used as the starting point.
16 const metamorphInput = {
17 "startUrls": [
18 {
19 "key": "START",
20 "value": "http://example.com"
21 }
22 ],
23 "crawlPurls": [
24 {
25 "key": "",
26 "value": "https://apps.carleton.edu/campus/registrar/schedule/enroll/?term=21FA&subject=[\\D+]"
27 }
28 ],
29
30 "clickableElementsSelector": null,
31 "pageFunction": function pageFunction(context) {
32 var $ = context.jQuery;
33 context.customData
34
35 if (context.request.label === "START") {
36 var SPREADSHEET_ID = "1nMnWyWLexxv4MZg7YOTS2YJT4c44RFGF4nThhfLelyY";
37 var NUMBER_OF_SHEETS = 1;
38 const majors = ["AFST",
39"AMMU",
40"AMST",
41"ARBC",
42"ARCN",
43"ARTH",
44"ASST",
45"ASTR",
46"BIOL",
47"CHEM",
48"CHIN",
49"CAMS",
50"CLAS",
51"CGSC",
52"CS",
53"CCST",
54"DANC",
55"ECON",
56"EDUC",
57"ENGL",
58"ENTS",
59"EUST",
60"FREN",
61"GEOL",
62"GERM",
63"GRK",
64"HIST",
65"IDSC",
66"JAPN",
67"LATN",
68"LTAM",
69"LING",
70"LCST",
71"MATH",
72"MARS",
73"MEST",
74"MELA",
75"MUSC",
76"NEUR",
77"PHIL",
78"PE",
79"PHYS",
80"POSC",
81"PSYC",
82"RELG",
83"RUSS",
84"SOAN",
85"SPAN",
86"ARTS",
87"THEA",
88"WGST"]
89 var loadData = function(id) {
90 var urlAPI = "https://spreadsheets.google.com/feeds/list/" + SPREADSHEET_ID + "/" + id + "/public/values?alt=json";
91 $.get(urlAPI, function(data) {
92 var entries = data.feed.entry;
93 $.each(entries, function(index, value) {
94 var url = value.title.$t;
95 context.enqueuePage(url);
96 });
97 if (id === NUMBER_OF_SHEETS) {
98 context.finish();
99 } else {
100 loadData(id + 1);
101 }
102 });
103 };
104 loadData(1);
105 context.skipOutput();
106 context.willFinishLater();
107 } else {
108 var results = [];
109 $(".course").each(function() {
110
111
112 if (( $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(1)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(1)").find(".end").text()).length ==3) {
113
114 x_m = null;
115
116 }
117 else {x_m = ( $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(1)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(1)").find(".end").text())
118
119
120 }
121
122 results.push({
123 value: results.length,
124 // CourseNum: $(this).find(".coursenum").text(),
125 id: $(this).find(".coursenum").text(),
126 key: $(this).find(".coursenum").text(),
127
128 // name: $(this).find(".coursenum").text(),
129
130 text: $(this).find(".title").text().replace($(this).find(".coursenum").text(),"").replace($(this).find(".credits").text(),"").trim(),
131
132 credits: $(this).find(".credits").text(),
133 // Details: $(this).find(".title").text().replace($(this).find(".coursenum").text(),"").replace($(this).find(".credits").text(),"").trim(),
134 location: $(this).find(".data").find(".schedule").find(".locations").text(),
135 instructor: $(this).find(".faculty").text().trim(),
136 // description: $(this).find(".description").find("p:nth-child(2)").text(),
137 // Days: $(this).find(".data").find(".schedule").find(".schedule").find("thead").find(".used").text(),
138 // start: $(this).find(".data").find(".schedule").find(".schedule").find("td:nth-child(1)").text(),
139 // end: $(this).find(".data").find(".schedule").find(".schedule").find("tbody").find(".used").text(), "ADD END"
140 mo: ( $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(1)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(1)").find(".end").text()).length ==3 ? null : ( $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(1)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(1)").find(".end").text()).replace(/am/g, " am").replace(/pm/g, " pm"),
141
142 tu: ($(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(2)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(2)").find(".end").text()).length == 3 ? null : ($(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(2)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(2)").find(".end").text()).replace(/am/g, " am").replace(/pm/g, " pm"),
143 we: ($(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(3)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(3)").find(".end").text()).length == 3 ? null : ($(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(3)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(3)").find(".end").text()).replace(/am/g, " am").replace(/pm/g, " pm"),
144 th: ($(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(4)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(4)").find(".end").text()).length == 3 ? null : ($(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(4)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(4)").find(".end").text()).replace(/am/g, " am").replace(/pm/g, " pm"),
145 fr: ($(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(5)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(5)").find(".end").text()).length == 3 ? null : ($(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(5)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(1)").find("td:nth-child(5)").find(".end").text()).replace(/am/g, " am").replace(/pm/g, " pm"),
146 mo_lab: ($(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(1)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(1)").find(".end").text()).length == 3 ? null : ($(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(1)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(1)").find(".end").text()).replace(/am/g, " am").replace(/pm/g, " pm"),
147 tu_lab: ($(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(2)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(2)").find(".end").text()).length == 3 ? null : ($(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(2)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(2)").find(".end").text()).replace(/am/g, " am").replace(/pm/g, " pm"),
148 we_lab: ($(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(3)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(3)").find(".end").text()).length == 3 ? null : ($(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(3)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(3)").find(".end").text()).replace(/am/g, " am").replace(/pm/g, " pm"),
149 th_lab: ($(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(4)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(4)").find(".end").text()).length == 3 ? null : ($(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(4)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(4)").find(".end").text()).replace(/am/g, " am").replace(/pm/g, " pm"),
150 fr_lab: ($(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(5)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(5)").find(".end").text()).length == 3 ? null : ($(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(5)").find(".start").text() + " - " + $(this).find(".data").find(".schedule").find("tr:nth-child(2)").find("td:nth-child(5)").find(".end").text()).replace(/am/g, " am").replace(/pm/g, " pm"),
151
152 });
153
154 });
155 return results;
156 }
157 },
158 "interceptRequest": function interceptRequest(context, newRequest) {
159 // called whenever the crawler finds a link to a new page,
160 // use it to override default behavior
161 return newRequest;
162 },
163 "considerUrlFragment": false,
164 "loadImages": true,
165 "loadCss": true,
166 "injectJQuery": true,
167 "injectUnderscoreJs": false,
168 "ignoreRobotsTxt": false,
169 "skipLoadingFrames": false,
170 "verboseLog": true,
171 "disableWebSecurity": false,
172 "rotateUserAgents": false,
173 "maxCrawledPages": null,
174 "maxOutputPages": null,
175 "maxCrawlDepth": null,
176 "resourceTimeout": null,
177 "pageLoadTimeout": null,
178 "pageFunctionTimeout": null,
179 "maxInfiniteScrollHeight": null,
180 "randomWaitBetweenRequests": null,
181 "maxCrawledPagesPerSlave": null,
182 "customHttpHeaders": null,
183 "customData": null,
184 "cookies": null,
185 "cookiesPersistence": "PER_PROCESS",
186 "finishWebhookUrl": null,
187 "finishWebhookData": null,
188 "maxParallelRequests": 1,
189 "proxyConfiguration": {
190 "useApifyProxy": false
191 }
192 };
193
194 // Now let's metamorph into actor apify/legacy-phantomjs-crawler using the created input.
195 await Apify.metamorph('apify/legacy-phantomjs-crawler', metamorphInput);
196});
package.json
1{
2 "name": "my-actor",
3 "version": "0.0.1",
4 "dependencies": {
5 "apify": "^0.17.0"
6 },
7 "scripts": {
8 "start": "node main.js"
9 },
10 "author": "Me!"
11}
Developer
Maintained by Community
Categories