#!/usr/bin/env node "use strict"; const fsPromises = require("node:fs/promises"); const path = require("node:path"); class TweetProcessingError extends Error { tweet; constructor(tweet, message) { super(`[${tweet.id}]: ${message}`); this.tweet = tweet; } } class MediaEntity { tweet; entity; fileName; mediaFilePath; constructor(tweet, entity) { this.tweet = tweet; this.entity = entity; } process(mediaPath) { const url = new URL(this.entity.media_url); this.fileName = path.basename(url.pathname); this.mediaFilePath = path.join(mediaPath, `${this.tweet.id}-${this.fileName}`); } async copyMediaFile(contentDirectory) { const destinationPath = path.join(contentDirectory, this.fileName); console.log(`[${this.tweet.id}]: Copying media file from ${this.mediaFilePath} to ${destinationPath}`); try { await fsPromises.copyFile(this.mediaFilePath, destinationPath); } catch (error) { console.error(error); throw error; } } } class Tweet { tweet; mediaEntities = []; constructor(tweet) { this.tweet = tweet; } get id() { return this.#innerTweet.id_str; } get dateCreated() { return new Date(this.#innerTweet.created_at); } get text() { return this.#innerTweet.full_text; } contentPath(asDirectory = true) { const innerTweet = this.#innerTweet; const idString = innerTweet.id_str; const timestamp = this.dateCreated; return path.join( timestamp.getFullYear().toString().padStart(4, '0'), // Months are 0-11. (timestamp.getMonth() + 1).toString().padStart(2, '0'), asDirectory ? idString : `${idString}.md` ); } get #innerTweet() { return this.tweet.tweet; } async process(hugoContentPath, mediaPath) { const dateCreated = this.dateCreated; const indexObject = { date: dateCreated.toISOString(), title: this.text, slug: this.id, tweet: this.tweet, }; let textOfTweet = this.text; let references = []; let doTextSubstitution = (startIndex, endIndex, replacementText) => { const textBefore = textOfTweet.substring(0, startIndex); const textAfter = textOfTweet.substring(endIndex); textOfTweet = `${textBefore}${replacementText}${textAfter}`; }; let processTextEntitySubstitution = (_, item, i) => { const [startIndex, endIndex] = item.indices; const reference = `[${textOfTweet.substring(startIndex, endIndex)}][entity${i}]`; doTextSubstitution(startIndex, endIndex, reference); references.push(`[entity${i}]: https://twitter.com/${item.screen_name}`); }; let processMediaEntitySubstitution = (_, item, __) => { const entity = new MediaEntity(this, item); this.mediaEntities.push(entity); entity.process(mediaPath); const [startIndex, endIndex] = item.indices; const reference = `\n\n{{< figures/image name="${entity.fileName}" >}}\n\n`; doTextSubstitution(startIndex, endIndex, reference); }; let shouldMakeDirectory = false; Object.entries(this.#innerTweet.entities) .map(([entityType, entities]) => entities.map(e => [entityType, e])) .flatMap(e => e) .sort(([, a], [, b]) => { const startIndexOfA = a.indices[0]; const startIndexOfB = b.indices[0]; if (startIndexOfA === startIndexOfB) { console.assert(false); return 0; } // Reverse sort by start index of the entity. return startIndexOfA < startIndexOfB ? 1 : -1; }).forEach(([typeOfItem, item], i) => { switch (typeOfItem) { case "hashtags": case "user_mentions": processTextEntitySubstitution(typeOfItem, item, i); break; case "media": shouldMakeDirectory = true; processMediaEntitySubstitution(typeOfItem, item, i); break; case "symbols": // Symbols appear to be stock ticker symbols. They may be other things. console.log(`[${this.id}]: Encountered symbol entity. Ignoring.`, item); break; } }); const frontMatter = JSON.stringify(indexObject, null, 4); const joinedReferences = references.reverse().join("\n"); let contentsOfIndexFile = `${frontMatter}\n\n${textOfTweet}\n\n${joinedReferences}`; let tweetFilePath; if (shouldMakeDirectory) { const contentPath = path.join(hugoContentPath, this.contentPath()); tweetFilePath = path.join(contentPath, "index.md"); console.log(`[${this.id}]: Writing tweet file to ${tweetFilePath}`); await fsPromises.mkdir(contentPath, { recursive: true }); for (const mediaEntity of this.mediaEntities) { await mediaEntity.copyMediaFile(contentPath); } } else { tweetFilePath = path.join(hugoContentPath, this.contentPath(false)); const containingDirectory = path.dirname(tweetFilePath); console.log(`[${this.id}]: Writing tweet file to ${tweetFilePath}`); await fsPromises.mkdir(containingDirectory, { recursive: true }); } try { await fsPromises.writeFile(tweetFilePath, contentsOfIndexFile); } catch (error) { console.error(error); throw error; } return this; } } function* take(iterable, length) { const iterator = iterable[Symbol.iterator](); while (length-- > 0) { yield iterator.next().value; } } function* readTweets(tweetsJSONPath) { tweetsJSONPath = path.resolve(tweetsJSONPath); console.log("Loading tweets from", tweetsJSONPath); const tweets = require(tweetsJSONPath); for (const tweetObject of tweets) { yield new Promise(resolve => { resolve(new Tweet(tweetObject)); }); } } async function main() { const programArguments = process.argv; // The first two arguments are always `node` and then the script. if (programArguments.length !== 4) { console.error("Invalid number of program arguments. Expected 3."); return -1; } const twitterArchivePath = programArguments[2]; const hugoContentPath = programArguments[3]; try { await fsPromises.access(twitterArchivePath); } catch (error) { console.error(error); return -1; } const twitterArchiveDataPath = path.join(twitterArchivePath, "data"); try { await fsPromises.access(twitterArchiveDataPath); } catch (error) { console.error(`${twitterArchivePath} doesn't appear to be a valid Twitter archive. It's missing a data directory!`); console.error(error); return -1; } try { await fsPromises.access(hugoContentPath); } catch (error) { console.error(error); return -1; } const tweetsFilePath = path.join(twitterArchiveDataPath, "tweets.json"); const tweetsMediaPath = path.join(twitterArchiveDataPath, "tweets_media"); let numberOfTweetsProcessed = 0; await Promise.all( [...take(readTweets(tweetsFilePath), 1000)].map(tweetPromise => { return tweetPromise .then(tweet => { numberOfTweetsProcessed++; return tweet.process(hugoContentPath, tweetsMediaPath); }) .catch(error => console.error(error)); })); console.log(`\nSuccessfully processed ${numberOfTweetsProcessed} tweets.`); return 0; } main();