From 44f57f495b8ca652c10dfce0b1f25aa092fb5c44 Mon Sep 17 00:00:00 2001 From: canonnizq Date: Sun, 27 Oct 2024 22:11:51 +0800 Subject: [PATCH 01/15] create namespace --- lib/routes/aimeta/namespace.ts | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 lib/routes/aimeta/namespace.ts diff --git a/lib/routes/aimeta/namespace.ts b/lib/routes/aimeta/namespace.ts new file mode 100644 index 00000000000000..7a5f4165f1abbf --- /dev/null +++ b/lib/routes/aimeta/namespace.ts @@ -0,0 +1,6 @@ +import type { Namespace } from '@/types'; + +export const namespace: Namespace = { + name: 'AI at Meta', + url: 'ai.meta.com', +}; \ No newline at end of file From dd6092e0f41a44c3a5b292fa306612047b6a669e Mon Sep 17 00:00:00 2001 From: canonnizq Date: Sun, 27 Oct 2024 22:18:50 +0800 Subject: [PATCH 02/15] create blog.ts --- lib/routes/aimeta/blog.ts | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 lib/routes/aimeta/blog.ts diff --git a/lib/routes/aimeta/blog.ts b/lib/routes/aimeta/blog.ts new file mode 100644 index 00000000000000..90426642d21804 --- /dev/null +++ b/lib/routes/aimeta/blog.ts @@ -0,0 +1,34 @@ +import { Route } from '@/types'; +import ofetch from '@/utils/ofetch'; +import { load } from 'cheerio'; +import { parseDate } from '@/utils/parse-date'; + +export const route: Route = { + path: '/blog', + categories: ['blog', 'programming'], + example: '/meta/blog', + radar: [{ source: ['ai.meta.com/blog'] }], + name: 'Blog', + maintainers: ['gavrilov'], + handler +} + +async function handler() { + const baseUrl = 'https://ai.meta.com'; + + const response = await ofetch(`${baseUrl}/blog`); + const $ = load(response); + + const items = $('div._amsu') + .toArray().map((item) => ({ + title: $(item).children('p._amt2').first().text(), + link: $(item).children('a._amt1').first().attr('href'), + description: $(item).children('p._amt3').first().text(), + pubDate: parseDate($(item).children('div._amt4').first().text()) + })) + return { + title: 'AI at Meta Blog', + link: 'https://ai.meta.com/blog', + item: items + } +} \ No newline at end of file From b7dc08ecfbff9210cb4f1542cc7d917603a1ec05 Mon Sep 17 00:00:00 2001 From: canonnizq Date: Sun, 27 Oct 2024 22:22:49 +0800 Subject: [PATCH 03/15] category finding --- lib/routes/aimeta/blog.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/routes/aimeta/blog.ts b/lib/routes/aimeta/blog.ts index 90426642d21804..b123afd2046fc2 100644 --- a/lib/routes/aimeta/blog.ts +++ b/lib/routes/aimeta/blog.ts @@ -21,8 +21,9 @@ async function handler() { const items = $('div._amsu') .toArray().map((item) => ({ - title: $(item).children('p._amt2').first().text(), + category: $(item).children('p._amt0').first().text(), link: $(item).children('a._amt1').first().attr('href'), + title: $(item).children('p._amt2').first().text(), description: $(item).children('p._amt3').first().text(), pubDate: parseDate($(item).children('div._amt4').first().text()) })) From 3e16e6374f37e3a263cb219fa5e2f03ed7ce9af2 Mon Sep 17 00:00:00 2001 From: canonnizq Date: Sun, 27 Oct 2024 22:26:58 +0800 Subject: [PATCH 04/15] fix typos --- lib/routes/aimeta/blog.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/routes/aimeta/blog.ts b/lib/routes/aimeta/blog.ts index b123afd2046fc2..97754f7bbdb819 100644 --- a/lib/routes/aimeta/blog.ts +++ b/lib/routes/aimeta/blog.ts @@ -6,10 +6,10 @@ import { parseDate } from '@/utils/parse-date'; export const route: Route = { path: '/blog', categories: ['blog', 'programming'], - example: '/meta/blog', + example: '/aimeta/blog', radar: [{ source: ['ai.meta.com/blog'] }], name: 'Blog', - maintainers: ['gavrilov'], + maintainers: ['canonnizq'], handler } @@ -27,6 +27,7 @@ async function handler() { description: $(item).children('p._amt3').first().text(), pubDate: parseDate($(item).children('div._amt4').first().text()) })) + return { title: 'AI at Meta Blog', link: 'https://ai.meta.com/blog', From 686f38a4beaca67ceeb9774be2864e0f4f622d9d Mon Sep 17 00:00:00 2001 From: canonnizq Date: Sun, 27 Oct 2024 22:29:43 +0800 Subject: [PATCH 05/15] fix linter errors --- lib/routes/aimeta/blog.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/routes/aimeta/blog.ts b/lib/routes/aimeta/blog.ts index 97754f7bbdb819..13e1d21a66015a 100644 --- a/lib/routes/aimeta/blog.ts +++ b/lib/routes/aimeta/blog.ts @@ -11,7 +11,7 @@ export const route: Route = { name: 'Blog', maintainers: ['canonnizq'], handler -} +}; async function handler() { const baseUrl = 'https://ai.meta.com'; @@ -26,11 +26,11 @@ async function handler() { title: $(item).children('p._amt2').first().text(), description: $(item).children('p._amt3').first().text(), pubDate: parseDate($(item).children('div._amt4').first().text()) - })) + })); return { title: 'AI at Meta Blog', link: 'https://ai.meta.com/blog', item: items - } + }; } \ No newline at end of file From 8d89cca72914d5d1052f1436cf4b43fe4cd6088c Mon Sep 17 00:00:00 2001 From: canonnizq Date: Sun, 27 Oct 2024 22:46:44 +0800 Subject: [PATCH 06/15] fix searching --- lib/routes/aimeta/blog.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/routes/aimeta/blog.ts b/lib/routes/aimeta/blog.ts index 13e1d21a66015a..b6930f7f9525e4 100644 --- a/lib/routes/aimeta/blog.ts +++ b/lib/routes/aimeta/blog.ts @@ -19,13 +19,13 @@ async function handler() { const response = await ofetch(`${baseUrl}/blog`); const $ = load(response); - const items = $('div._amsu') + const items = $('div._ams_') .toArray().map((item) => ({ - category: $(item).children('p._amt0').first().text(), - link: $(item).children('a._amt1').first().attr('href'), - title: $(item).children('p._amt2').first().text(), - description: $(item).children('p._amt3').first().text(), - pubDate: parseDate($(item).children('div._amt4').first().text()) + category: $(item).children('p._amt0').text(), + link: $(item).children('a._amt1').attr('href'), + title: $(item).children('a._amt1').children('p._amt2').text(), + description: $(item).children('p._amt3').children('p._amt3').text(), + pubDate: parseDate($(item).children('p._amt4').text()) })); return { From 676ccba7871908cadbd196d368afc2318d5cba32 Mon Sep 17 00:00:00 2001 From: canonnizq Date: Sun, 27 Oct 2024 23:02:00 +0800 Subject: [PATCH 07/15] correct class name --- lib/routes/aimeta/blog.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/routes/aimeta/blog.ts b/lib/routes/aimeta/blog.ts index b6930f7f9525e4..8274d1034466ad 100644 --- a/lib/routes/aimeta/blog.ts +++ b/lib/routes/aimeta/blog.ts @@ -24,7 +24,7 @@ async function handler() { category: $(item).children('p._amt0').text(), link: $(item).children('a._amt1').attr('href'), title: $(item).children('a._amt1').children('p._amt2').text(), - description: $(item).children('p._amt3').children('p._amt3').text(), + description: $(item).children('p._4ik4._4ik5').children('p._amt3').text(), pubDate: parseDate($(item).children('p._amt4').text()) })); From 0c3c75b67a5cf797e2144a78bcb3e571b1745502 Mon Sep 17 00:00:00 2001 From: canonnizq Date: Mon, 28 Oct 2024 09:14:48 +0800 Subject: [PATCH 08/15] (hopefully) fix search --- lib/routes/aimeta/blog.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/routes/aimeta/blog.ts b/lib/routes/aimeta/blog.ts index 8274d1034466ad..2bdc3f99da5a5f 100644 --- a/lib/routes/aimeta/blog.ts +++ b/lib/routes/aimeta/blog.ts @@ -16,16 +16,16 @@ export const route: Route = { async function handler() { const baseUrl = 'https://ai.meta.com'; - const response = await ofetch(`${baseUrl}/blog`); + const response = await ofetch(`${baseUrl}/blog/`); const $ = load(response); const items = $('div._ams_') .toArray().map((item) => ({ - category: $(item).children('p._amt0').text(), - link: $(item).children('a._amt1').attr('href'), - title: $(item).children('a._amt1').children('p._amt2').text(), - description: $(item).children('p._4ik4._4ik5').children('p._amt3').text(), - pubDate: parseDate($(item).children('p._amt4').text()) + category: $(item).find('p._amt0').text(), + link: $(item).find('a._amt1').attr('href'), + title: $(item).find('a._amt1 p._amt2').text(), + description: $(item).find('div._4ik4._4ik5 p._amt3').text(), + pubDate: parseDate($(item).find('p._amt4').text()) })); return { From 9ec7d22c07b1078f00919c54466ace24de420138 Mon Sep 17 00:00:00 2001 From: CanonNi Date: Fri, 1 Nov 2024 18:57:14 +0800 Subject: [PATCH 09/15] Update lib/routes/aimeta/blog.ts Co-authored-by: Tony --- lib/routes/aimeta/blog.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/routes/aimeta/blog.ts b/lib/routes/aimeta/blog.ts index 2bdc3f99da5a5f..0f692a2e35fa5c 100644 --- a/lib/routes/aimeta/blog.ts +++ b/lib/routes/aimeta/blog.ts @@ -5,7 +5,7 @@ import { parseDate } from '@/utils/parse-date'; export const route: Route = { path: '/blog', - categories: ['blog', 'programming'], + categories: ['blog'], example: '/aimeta/blog', radar: [{ source: ['ai.meta.com/blog'] }], name: 'Blog', From aa4403eee7d8ddf16bfea1513a8e50f45ff864eb Mon Sep 17 00:00:00 2001 From: canonnizq Date: Fri, 1 Nov 2024 19:01:01 +0800 Subject: [PATCH 10/15] per comments on pull request --- lib/routes/{aimeta => meta}/blog.ts | 6 +++--- lib/routes/{aimeta => meta}/namespace.ts | 0 2 files changed, 3 insertions(+), 3 deletions(-) rename lib/routes/{aimeta => meta}/blog.ts (88%) rename lib/routes/{aimeta => meta}/namespace.ts (100%) diff --git a/lib/routes/aimeta/blog.ts b/lib/routes/meta/blog.ts similarity index 88% rename from lib/routes/aimeta/blog.ts rename to lib/routes/meta/blog.ts index 2bdc3f99da5a5f..7ad8c25f3309f5 100644 --- a/lib/routes/aimeta/blog.ts +++ b/lib/routes/meta/blog.ts @@ -7,7 +7,7 @@ export const route: Route = { path: '/blog', categories: ['blog', 'programming'], example: '/aimeta/blog', - radar: [{ source: ['ai.meta.com/blog'] }], + radar: [{ source: ['ai.meta.com/blog/'] }], name: 'Blog', maintainers: ['canonnizq'], handler @@ -17,7 +17,7 @@ async function handler() { const baseUrl = 'https://ai.meta.com'; const response = await ofetch(`${baseUrl}/blog/`); - const $ = load(response); + const $ = load(response); const items = $('div._ams_') .toArray().map((item) => ({ @@ -30,7 +30,7 @@ async function handler() { return { title: 'AI at Meta Blog', - link: 'https://ai.meta.com/blog', + link: 'https://ai.meta.com/blog/', item: items }; } \ No newline at end of file diff --git a/lib/routes/aimeta/namespace.ts b/lib/routes/meta/namespace.ts similarity index 100% rename from lib/routes/aimeta/namespace.ts rename to lib/routes/meta/namespace.ts From f72267dc91898d7a7c49e45bca0efea6b4f60752 Mon Sep 17 00:00:00 2001 From: canonnizq Date: Fri, 1 Nov 2024 19:01:26 +0800 Subject: [PATCH 11/15] fix path --- lib/routes/meta/blog.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/routes/meta/blog.ts b/lib/routes/meta/blog.ts index 7ad8c25f3309f5..93049378b98a15 100644 --- a/lib/routes/meta/blog.ts +++ b/lib/routes/meta/blog.ts @@ -6,7 +6,7 @@ import { parseDate } from '@/utils/parse-date'; export const route: Route = { path: '/blog', categories: ['blog', 'programming'], - example: '/aimeta/blog', + example: '/meta/blog', radar: [{ source: ['ai.meta.com/blog/'] }], name: 'Blog', maintainers: ['canonnizq'], From 73cacb1aaf46e0ad580dacac73eca2e414e6bd74 Mon Sep 17 00:00:00 2001 From: canonnizq Date: Fri, 1 Nov 2024 19:43:31 +0800 Subject: [PATCH 12/15] rename from aimeta to meta --- lib/routes/meta/namespace.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/routes/meta/namespace.ts b/lib/routes/meta/namespace.ts index 7a5f4165f1abbf..a546feedebaf2a 100644 --- a/lib/routes/meta/namespace.ts +++ b/lib/routes/meta/namespace.ts @@ -2,5 +2,5 @@ import type { Namespace } from '@/types'; export const namespace: Namespace = { name: 'AI at Meta', - url: 'ai.meta.com', + url: 'meta.com', }; \ No newline at end of file From 396f88529fdcc7ba683f393e0b111fe58817c388 Mon Sep 17 00:00:00 2001 From: canonnizq Date: Fri, 1 Nov 2024 20:57:01 +0800 Subject: [PATCH 13/15] fix linter errors --- lib/routes/meta/blog.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/routes/meta/blog.ts b/lib/routes/meta/blog.ts index 662e897c25815a..9e8c9d87268960 100644 --- a/lib/routes/meta/blog.ts +++ b/lib/routes/meta/blog.ts @@ -17,7 +17,7 @@ async function handler() { const baseUrl = 'https://ai.meta.com'; const response = await ofetch(`${baseUrl}/blog/`); - const $ = load(response); + const $ = load(response); const items = $('div._ams_') .toArray().map((item) => ({ @@ -26,7 +26,7 @@ async function handler() { title: $(item).find('a._amt1 p._amt2').text(), description: $(item).find('div._4ik4._4ik5 p._amt3').text(), pubDate: parseDate($(item).find('p._amt4').text()) - })); + })); return { title: 'AI at Meta Blog', From 819ffea4aa15844dc8910fc2869957314e90bf3d Mon Sep 17 00:00:00 2001 From: canonnizq Date: Fri, 1 Nov 2024 21:00:00 +0800 Subject: [PATCH 14/15] testing if puppeteer works --- lib/routes/meta/blog.ts | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/lib/routes/meta/blog.ts b/lib/routes/meta/blog.ts index 9e8c9d87268960..439de9902bbe97 100644 --- a/lib/routes/meta/blog.ts +++ b/lib/routes/meta/blog.ts @@ -1,7 +1,8 @@ import { Route } from '@/types'; -import ofetch from '@/utils/ofetch'; -import { load } from 'cheerio'; import { parseDate } from '@/utils/parse-date'; +import { load } from 'cheerio'; +import logger from '@/utils/logger'; +import puppeteer from '@/utils/puppeteer'; export const route: Route = { path: '/blog', @@ -16,7 +17,23 @@ export const route: Route = { async function handler() { const baseUrl = 'https://ai.meta.com'; - const response = await ofetch(`${baseUrl}/blog/`); + const browser = await puppeteer(); + const page = await browser.newPage(); + await page.setRequestInterception(true); + + page.on('request', (request) => { + request.resourceType() === 'document' ? request.continue() : request.abort(); + }); + + const link = `${baseUrl}/blog/`; + logger.http(`Requesting ${link}`); + await page.goto(link, { + waitUntil: 'domcontentloaded', + }); + + const response = await page.content(); + page.close(); + const $ = load(response); const items = $('div._ams_') @@ -28,6 +45,8 @@ async function handler() { pubDate: parseDate($(item).find('p._amt4').text()) })); + browser.close(); + return { title: 'AI at Meta Blog', link: 'https://ai.meta.com/blog/', From 09829524068d23ebd9ded80aea20bc95ae5c247e Mon Sep 17 00:00:00 2001 From: canonnizq Date: Sat, 2 Nov 2024 20:30:33 +0800 Subject: [PATCH 15/15] try longer load time & fix lint errors --- lib/routes/meta/blog.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/routes/meta/blog.ts b/lib/routes/meta/blog.ts index 439de9902bbe97..188a81fe2c4e9d 100644 --- a/lib/routes/meta/blog.ts +++ b/lib/routes/meta/blog.ts @@ -3,6 +3,7 @@ import { parseDate } from '@/utils/parse-date'; import { load } from 'cheerio'; import logger from '@/utils/logger'; import puppeteer from '@/utils/puppeteer'; +import cache from '@/utils/cache'; export const route: Route = { path: '/blog', @@ -20,7 +21,7 @@ async function handler() { const browser = await puppeteer(); const page = await browser.newPage(); await page.setRequestInterception(true); - + page.on('request', (request) => { request.resourceType() === 'document' ? request.continue() : request.abort(); }); @@ -28,14 +29,13 @@ async function handler() { const link = `${baseUrl}/blog/`; logger.http(`Requesting ${link}`); await page.goto(link, { - waitUntil: 'domcontentloaded', + waitUntil: 'load', }); const response = await page.content(); page.close(); const $ = load(response); - const items = $('div._ams_') .toArray().map((item) => ({ category: $(item).find('p._amt0').text(),