Copied over the templates from blog/ and set up permalinks. Drew some bespoke star and retweet icons. Wrote a script to import data from the archive.
264 lines
8 KiB
JavaScript
Executable file
264 lines
8 KiB
JavaScript
Executable file
#!/usr/bin/env node
|
|
|
|
"use strict";
|
|
|
|
const fsPromises = require("node:fs/promises");
|
|
const path = require("node:path");
|
|
|
|
class TweetProcessingError extends Error {
|
|
tweet;
|
|
|
|
constructor(tweet, message) {
|
|
super(`[${tweet.id}]: ${message}`);
|
|
this.tweet = tweet;
|
|
}
|
|
}
|
|
|
|
class MediaEntity {
|
|
tweet;
|
|
entity;
|
|
|
|
fileName;
|
|
mediaFilePath;
|
|
|
|
constructor(tweet, entity) {
|
|
this.tweet = tweet;
|
|
this.entity = entity;
|
|
}
|
|
|
|
process(mediaPath) {
|
|
const url = new URL(this.entity.media_url);
|
|
this.fileName = path.basename(url.pathname);
|
|
this.mediaFilePath = path.join(mediaPath, `${this.tweet.id}-${this.fileName}`);
|
|
}
|
|
|
|
async copyMediaFile(contentDirectory) {
|
|
const destinationPath = path.join(contentDirectory, this.fileName);
|
|
console.log(`[${this.tweet.id}]: Copying media file from ${this.mediaFilePath} to ${destinationPath}`);
|
|
|
|
try {
|
|
await fsPromises.copyFile(this.mediaFilePath, destinationPath);
|
|
} catch (error) {
|
|
console.error(error);
|
|
throw error;
|
|
}
|
|
}
|
|
}
|
|
|
|
class Tweet {
|
|
tweet;
|
|
mediaEntities = [];
|
|
|
|
constructor(tweet) {
|
|
this.tweet = tweet;
|
|
}
|
|
|
|
get id() {
|
|
return this.#innerTweet.id_str;
|
|
}
|
|
|
|
get dateCreated() {
|
|
return new Date(this.#innerTweet.created_at);
|
|
}
|
|
|
|
get text() {
|
|
return this.#innerTweet.full_text;
|
|
}
|
|
|
|
contentPath(asDirectory = true) {
|
|
const innerTweet = this.#innerTweet;
|
|
|
|
const idString = innerTweet.id_str;
|
|
const timestamp = this.dateCreated;
|
|
|
|
return path.join(
|
|
timestamp.getFullYear().toString().padStart(4, '0'),
|
|
// Months are 0-11.
|
|
(timestamp.getMonth() + 1).toString().padStart(2, '0'),
|
|
asDirectory ? idString : `${idString}.md`
|
|
);
|
|
}
|
|
|
|
get #innerTweet() { return this.tweet.tweet; }
|
|
|
|
async process(hugoContentPath, mediaPath) {
|
|
const dateCreated = this.dateCreated;
|
|
const indexObject = {
|
|
date: dateCreated.toISOString(),
|
|
title: this.text,
|
|
slug: this.id,
|
|
tweet: this.tweet,
|
|
};
|
|
|
|
|
|
let textOfTweet = this.text;
|
|
let references = [];
|
|
|
|
let doTextSubstitution = (startIndex, endIndex, replacementText) => {
|
|
const textBefore = textOfTweet.substring(0, startIndex);
|
|
const textAfter = textOfTweet.substring(endIndex);
|
|
textOfTweet = `${textBefore}${replacementText}${textAfter}`;
|
|
};
|
|
|
|
let processTextEntitySubstitution = (_, item, i) => {
|
|
const [startIndex, endIndex] = item.indices;
|
|
const reference = `[${textOfTweet.substring(startIndex, endIndex)}][entity${i}]`;
|
|
doTextSubstitution(startIndex, endIndex, reference);
|
|
|
|
references.push(`[entity${i}]: https://twitter.com/${item.screen_name}`);
|
|
};
|
|
|
|
let processMediaEntitySubstitution = (_, item, __) => {
|
|
const entity = new MediaEntity(this, item);
|
|
this.mediaEntities.push(entity);
|
|
entity.process(mediaPath);
|
|
|
|
const [startIndex, endIndex] = item.indices;
|
|
const reference = `\n\n{{< figures/image name="${entity.fileName}" >}}\n\n`;
|
|
doTextSubstitution(startIndex, endIndex, reference);
|
|
};
|
|
|
|
let shouldMakeDirectory = false;
|
|
|
|
Object.entries(this.#innerTweet.entities)
|
|
.map(([entityType, entities]) => entities.map(e => [entityType, e]))
|
|
.flatMap(e => e)
|
|
.sort(([, a], [, b]) => {
|
|
const startIndexOfA = a.indices[0];
|
|
const startIndexOfB = b.indices[0];
|
|
|
|
if (startIndexOfA === startIndexOfB) {
|
|
console.assert(false);
|
|
return 0;
|
|
}
|
|
|
|
// Reverse sort by start index of the entity.
|
|
return startIndexOfA < startIndexOfB ? 1 : -1;
|
|
}).forEach(([typeOfItem, item], i) => {
|
|
switch (typeOfItem) {
|
|
case "hashtags":
|
|
case "user_mentions":
|
|
processTextEntitySubstitution(typeOfItem, item, i);
|
|
break;
|
|
case "media":
|
|
shouldMakeDirectory = true;
|
|
processMediaEntitySubstitution(typeOfItem, item, i);
|
|
break;
|
|
case "symbols":
|
|
// Symbols appear to be stock ticker symbols. They may be other things.
|
|
console.log(`[${this.id}]: Encountered symbol entity. Ignoring.`, item);
|
|
break;
|
|
}
|
|
});
|
|
|
|
|
|
const frontMatter = JSON.stringify(indexObject, null, 4);
|
|
const joinedReferences = references.reverse().join("\n");
|
|
let contentsOfIndexFile = `${frontMatter}\n\n${textOfTweet}\n\n${joinedReferences}`;
|
|
|
|
let tweetFilePath;
|
|
if (shouldMakeDirectory) {
|
|
const contentPath = path.join(hugoContentPath, this.contentPath());
|
|
tweetFilePath = path.join(contentPath, "index.md");
|
|
|
|
console.log(`[${this.id}]: Writing tweet file to ${tweetFilePath}`);
|
|
await fsPromises.mkdir(contentPath, { recursive: true });
|
|
|
|
for (const mediaEntity of this.mediaEntities) {
|
|
await mediaEntity.copyMediaFile(contentPath);
|
|
}
|
|
} else {
|
|
tweetFilePath = path.join(hugoContentPath, this.contentPath(false));
|
|
const containingDirectory = path.dirname(tweetFilePath);
|
|
|
|
console.log(`[${this.id}]: Writing tweet file to ${tweetFilePath}`);
|
|
await fsPromises.mkdir(containingDirectory, { recursive: true });
|
|
}
|
|
|
|
try {
|
|
await fsPromises.writeFile(tweetFilePath, contentsOfIndexFile);
|
|
} catch (error) {
|
|
console.error(error);
|
|
throw error;
|
|
}
|
|
|
|
return this;
|
|
}
|
|
}
|
|
|
|
function* take(iterable, length) {
|
|
const iterator = iterable[Symbol.iterator]();
|
|
while (length-- > 0) {
|
|
yield iterator.next().value;
|
|
}
|
|
}
|
|
|
|
function* readTweets(tweetsJSONPath) {
|
|
tweetsJSONPath = path.resolve(tweetsJSONPath);
|
|
|
|
console.log("Loading tweets from", tweetsJSONPath);
|
|
const tweets = require(tweetsJSONPath);
|
|
|
|
for (const tweetObject of tweets) {
|
|
yield new Promise(resolve => {
|
|
resolve(new Tweet(tweetObject));
|
|
});
|
|
}
|
|
}
|
|
|
|
async function main() {
|
|
const programArguments = process.argv;
|
|
|
|
// The first two arguments are always `node` and then the script.
|
|
if (programArguments.length !== 4) {
|
|
console.error("Invalid number of program arguments. Expected 3.");
|
|
return -1;
|
|
}
|
|
|
|
const twitterArchivePath = programArguments[2];
|
|
const hugoContentPath = programArguments[3];
|
|
|
|
try {
|
|
await fsPromises.access(twitterArchivePath);
|
|
} catch (error) {
|
|
console.error(error);
|
|
return -1;
|
|
}
|
|
|
|
const twitterArchiveDataPath = path.join(twitterArchivePath, "data");
|
|
try {
|
|
await fsPromises.access(twitterArchiveDataPath);
|
|
} catch (error) {
|
|
console.error(`${twitterArchivePath} doesn't appear to be a valid Twitter archive. It's missing a data directory!`);
|
|
console.error(error);
|
|
return -1;
|
|
}
|
|
|
|
try {
|
|
await fsPromises.access(hugoContentPath);
|
|
} catch (error) {
|
|
console.error(error);
|
|
return -1;
|
|
}
|
|
|
|
const tweetsFilePath = path.join(twitterArchiveDataPath, "tweets.json");
|
|
const tweetsMediaPath = path.join(twitterArchiveDataPath, "tweets_media");
|
|
|
|
let numberOfTweetsProcessed = 0;
|
|
|
|
await Promise.all(
|
|
[...take(readTweets(tweetsFilePath), 1000)].map(tweetPromise => {
|
|
return tweetPromise
|
|
.then(tweet => {
|
|
numberOfTweetsProcessed++;
|
|
return tweet.process(hugoContentPath, tweetsMediaPath);
|
|
})
|
|
.catch(error => console.error(error));
|
|
}));
|
|
|
|
console.log(`\nSuccessfully processed ${numberOfTweetsProcessed} tweets.`);
|
|
|
|
return 0;
|
|
}
|
|
|
|
main();
|