I have a database with a few million sha256 hashes of files. I frequently get millions of new files which I have to check against the database to avoid duplicates.
It takes years to check a hash of a file against the mysql db. I already splitted the hashes into 16 tables (0 to F). I already tried couchbase, but this needs more than 8GB of my RAM and aborted the import with a few millions hashes left cause of to much RAM usage...
Can anyone give me a solution to store about 4,5GB of hashes (size calucalted when hashes are dumped to a plain text file) in a databse which is faster than MySQL?
The Hashes are stored without any meta information, no filename or path or id or whatelse.
kind regards, 3vilc00kie
Edit Table Definition:
-- phpMyAdmin SQL Dump
-- version 3.3.9
-- http://www.phpmyadmin.net
--
-- Host: 127.0.0.1
-- Erstellungszeit: 31. Januar 2014 um 13:55
-- Server Version: 5.5.8
-- PHP-Version: 5.3.5
SET SQL_MODE="NO_AUTO_VALUE_ON_ZERO";
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
/*!40101 SET NAMES utf8 */;
--
-- Datenbank: `filehashes`
--
-- --------------------------------------------------------
--
-- Tabellenstruktur für Tabelle `0`
--
CREATE TABLE IF NOT EXISTS `0` (
`sha256` binary(32) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-- --------------------------------------------------------
--
-- Tabellenstruktur für Tabelle `1`
--
CREATE TABLE IF NOT EXISTS `1` (
`sha256` binary(32) NOT NULL,
UNIQUE KEY `sha256` (`sha256`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-- --------------------------------------------------------
--
-- Tabellenstruktur für Tabelle `2`
--
CREATE TABLE IF NOT EXISTS `2` (
`sha256` binary(32) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-- --------------------------------------------------------
--
-- Tabellenstruktur für Tabelle `3`
--
CREATE TABLE IF NOT EXISTS `3` (
`sha256` binary(32) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-- --------------------------------------------------------
--
-- Tabellenstruktur für Tabelle `4`
--
CREATE TABLE IF NOT EXISTS `4` (
`sha256` binary(32) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-- --------------------------------------------------------
--
-- Tabellenstruktur für Tabelle `5`
--
CREATE TABLE IF NOT EXISTS `5` (
`sha256` binary(32) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-- --------------------------------------------------------
--
-- Tabellenstruktur für Tabelle `6`
--
CREATE TABLE IF NOT EXISTS `6` (
`sha256` binary(32) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-- --------------------------------------------------------
--
-- Tabellenstruktur für Tabelle `7`
--
CREATE TABLE IF NOT EXISTS `7` (
`sha256` binary(32) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-- --------------------------------------------------------
--
-- Tabellenstruktur für Tabelle `8`
--
CREATE TABLE IF NOT EXISTS `8` (
`sha256` binary(32) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-- --------------------------------------------------------
--
-- Tabellenstruktur für Tabelle `9`
--
CREATE TABLE IF NOT EXISTS `9` (
`sha256` binary(32) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-- --------------------------------------------------------
--
-- Tabellenstruktur für Tabelle `a`
--
CREATE TABLE IF NOT EXISTS `a` (
`sha256` binary(32) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-- --------------------------------------------------------
--
-- Tabellenstruktur für Tabelle `b`
--
CREATE TABLE IF NOT EXISTS `b` (
`sha256` binary(32) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-- --------------------------------------------------------
--
-- Tabellenstruktur für Tabelle `c`
--
CREATE TABLE IF NOT EXISTS `c` (
`sha256` binary(32) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-- --------------------------------------------------------
--
-- Tabellenstruktur für Tabelle `d`
--
CREATE TABLE IF NOT EXISTS `d` (
`sha256` binary(32) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-- --------------------------------------------------------
--
-- Tabellenstruktur für Tabelle `e`
--
CREATE TABLE IF NOT EXISTS `e` (
`sha256` binary(32) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-- --------------------------------------------------------
--
-- Tabellenstruktur für Tabelle `f`
--
CREATE TABLE IF NOT EXISTS `f` (
`sha256` binary(32) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1;