postgresqlimdb
Differences
This shows you the differences between two versions of the page.
Next revision | Previous revision | ||
postgresqlimdb [2024/03/08 10:15] – created z0hpvk | postgresqlimdb [2025/03/08 22:24] (current) – external edit 127.0.0.1 | ||
---|---|---|---|
Line 1: | Line 1: | ||
====== IMDB Database ====== | ====== IMDB Database ====== | ||
- | The import of the data was done using the script here: [[https:// | + | The import of the data was based on the script here: [[https:// |
+ | Run the below script as the postgres user to avoid permission issues.\\ | ||
+ | <code BASH> | ||
+ | printf " | ||
+ | rm -rf imdb-datasets/ | ||
+ | mkdir imdb-datasets/ | ||
+ | |||
+ | printf " | ||
+ | cd imdb-datasets | ||
+ | curl -O https:// | ||
+ | curl -O https:// | ||
+ | curl -O https:// | ||
+ | curl -O https:// | ||
+ | curl -O https:// | ||
+ | curl -O https:// | ||
+ | curl -O https:// | ||
+ | |||
+ | printf " | ||
+ | gzip -dk *.gz | ||
+ | cd .. | ||
+ | |||
+ | printf " | ||
+ | psql -d ' | ||
+ | psql -d ' | ||
+ | |||
+ | printf " | ||
+ | psql -d imdb -c " | ||
+ | psql -d imdb -c " | ||
+ | psql -d imdb -c " | ||
+ | psql -d imdb -c " | ||
+ | psql -d imdb -c " | ||
+ | psql -d imdb -c " | ||
+ | psql -d imdb -c " | ||
+ | |||
+ | printf " | ||
+ | psql -d imdb -c "COPY title_ratings FROM ' | ||
+ | psql -d imdb -c "COPY name_basics FROM ' | ||
+ | psql -d imdb -c "COPY title_akas FROM ' | ||
+ | psql -d imdb -c "COPY title_basics FROM ' | ||
+ | psql -d imdb -c "COPY title_crew FROM ' | ||
+ | psql -d imdb -c "COPY title_episode FROM ' | ||
+ | psql -d imdb -c "COPY title_principals FROM ' | ||
+ | </ | ||
+ | |||
+ | Next run the below SQL script to | ||
+ | * Create Primary Keys | ||
+ | * Fix Data Issues | ||
+ | * Create Foreign Keys | ||
+ | * Convert Column Lists to Arrays | ||
+ | |||
+ | <code SQL> | ||
+ | ALTER TABLE title_ratings ADD CONSTRAINT title_ratings_pk PRIMARY KEY (tconst); | ||
+ | ALTER TABLE title_akas ADD CONSTRAINT title_akas_pk PRIMARY KEY (titleid, ordering); | ||
+ | ALTER TABLE title_basics ADD CONSTRAINT title_basics_pk PRIMARY KEY (tconst); | ||
+ | ALTER TABLE title_crew ADD CONSTRAINT title_crew_pk PRIMARY KEY (tconst); | ||
+ | ALTER TABLE title_episode ADD CONSTRAINT title_episode_pk PRIMARY KEY (const); | ||
+ | ALTER TABLE title_principals ADD CONSTRAINT title_principals_pk PRIMARY KEY (tconst, ordering); | ||
+ | ALTER TABLE name_basics ADD CONSTRAINT name_basics_pk PRIMARY KEY (nconst); | ||
+ | |||
+ | INSERT INTO name_basics (nconst, | ||
+ | (' | ||
+ | |||
+ | ALTER TABLE title_ratings ADD CONSTRAINT title_ratings_fk FOREIGN KEY (tconst) REFERENCES title_basics (tconst); | ||
+ | ALTER TABLE title_episode ADD CONSTRAINT title_episode_fk FOREIGN KEY (parenttconst) REFERENCES title_basics (tconst); | ||
+ | ALTER TABLE title_principals ADD CONSTRAINT title_principals_fk FOREIGN KEY (nconst) REFERENCES name_basics (nconst); | ||
+ | |||
+ | ALTER TABLE name_basics ALTER primaryprofession TYPE text[] USING string_to_array(primaryprofession, | ||
+ | ALTER TABLE name_basics ALTER knownfortitles TYPE text[] USING string_to_array(knownfortitles, | ||
+ | ALTER TABLE title_basics ALTER genres TYPE text[] USING string_to_array(genres, | ||
+ | ALTER TABLE title_crew ALTER directors TYPE text[] USING string_to_array(directors, | ||
+ | ALTER TABLE title_crew ALTER writers TYPE text[] USING string_to_array(writers, | ||
+ | </ |
postgresqlimdb.1709892920.txt.gz · Last modified: 2025/03/08 22:23 (external edit)