Skip to content

Commit

Permalink
Update to reflect changes in spec
Browse files Browse the repository at this point in the history
  • Loading branch information
laserpants committed Sep 23, 2023
1 parent 41639ab commit d47ba99
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 213 deletions.
88 changes: 29 additions & 59 deletions include/sqids/sqids.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
///

///
/// @file sqids.hpp
/// @link https://github.com/sqids/sqids-cpp
/// @file sqids.hpp
/// @link https://github.com/sqids/sqids-cpp
/// @author Heikki Johannes Hildén
///
#pragma once
Expand Down Expand Up @@ -66,7 +66,7 @@ struct SqidsOptions
///
/// The minimum allowed length of IDs.
///
size_t minLength = 0;
uint8_t minLength = 0;

///
/// A list of words that must never appear in IDs.
Expand Down Expand Up @@ -111,9 +111,9 @@ class Sqids
std::string encode(const std::vector<T>& numbers) const;
std::vector<T> decode(const std::string& id) const;

private:
static constexpr T maxValue = std::numeric_limits<T>::max();

private:
struct Encoder
{
Encoder(const Sqids<T>* _sqids, const std::vector<T>& _numbers);
Expand Down Expand Up @@ -213,11 +213,6 @@ Sqids<T>::Sqids(const SqidsOptions& options)
throw std::runtime_error("Alphabet must not contain duplicate characters.");
}

// Minimum length cannot be greater than the alphabet size
if (options.minLength > alphabetSize) {
throw std::runtime_error("Minimum length cannot be greater than the alphabet size.");
}

const std::string lowercaseAlphabet(lowercaseString(options.alphabet));

// Clean up blocklist
Expand Down Expand Up @@ -351,7 +346,7 @@ typename std::vector<T> Sqids<T>::decode(const std::string& id) const
numbers.push_back(toNumber(chunks[0], alphabet.substr(1)));

// If this ID has multiple numbers, shuffle the alphabet, just as
// the encoding function did
// the encoding function does
if (chunks.size() > 1) {
shuffle(alphabet);
}
Expand Down Expand Up @@ -448,102 +443,77 @@ bool Sqids<T>::isBlockedId(const std::string& id) const
}

template<typename T>
Sqids<T>::Encoder::Encoder(const Sqids<T>* _sqids, const Numbers& _numbers)
Sqids<T>::Encoder::Encoder(const Sqids<T>* _sqids, const std::vector<T>& _numbers)
: sqids(_sqids),
numbers(_numbers)
{
}

template<typename T>
std::string Sqids<T>::Encoder::run(bool partitioned)
std::string Sqids<T>::Encoder::run(unsigned int increment)
{
// Get a semi-random offset from input numbers
const size_t alphabetSize = sqids->_alphabet.size();

if (increment > alphabetSize) {
throw std::runtime_error("Reached max attempts to re-generate the ID.");
}

// Get a semi-random offset from input numbers
auto a = numbers.size();

for (unsigned int i = 0; i < numbers.size(); i++) {
const T v = numbers[i];
a += i + sqids->_alphabet[v % alphabetSize];
}

const auto offset = a % alphabetSize;
const auto offset = (a + increment) % alphabetSize;

// Re-arrange alphabet so that second-half goes in front of the first-half
std::string alphabet(sqids->_alphabet.substr(offset) + sqids->_alphabet.substr(0, offset));

// `prefix` is the first character in the generated ID, used for randomization
const auto prefix = alphabet[0];

// `partition` is the character used instead of the first separator to
// indicate that the first number in the input array is a throwaway
// number. This character is used only once to handle blocklist and/or
// padding. it's omitted completely in all other cases
const auto partition = alphabet[1];

// The alphabet should not contain the `prefix` or the `partition` character
alphabet.erase(0, 2);
// Reverse alphabet
std::reverse(alphabet.begin(), alphabet.end());

// The final ID will always have the `prefix` character at the beginning
std::string id = { prefix };

// Encode the input array
for (auto it = numbers.cbegin(); it != numbers.cend(); ++it) {
// The last character of the alphabet is going to be reserved for the `separator`
const auto alphabetWithoutSeparator = alphabet.substr(0, alphabet.size() - 1);

// The first character of the alphabet is going to be reserved for the `separator`
const auto alphabetWithoutSeparator = alphabet.substr(1);

id += sqids->toId(*it, alphabetWithoutSeparator);

// If not the last number
if (std::next(it) != numbers.cend()) {
// `separator` character is used to isolate numbers within the ID
const auto separator = alphabet[alphabet.size() - 1];

// For the barrier, use the `separator` unless this is the first
// iteration and the first number is a throwaway number -- then use
// the `partition` character
if (partitioned && it == numbers.cbegin()) {
id.push_back(partition);
} else {
id.push_back(separator);
}
id.push_back(alphabet[0]);

// Shuffle on every iteration
sqids->shuffle(alphabet);
}
}

// If `minLength` is used and the ID is too short, add a throwaway number
// Handle `minLength` requirement, if the ID is too short
if (sqids->_minLength > id.size()) {
// Partitioning is required so we can safely throw away chunk of the ID
// during decoding
if (!partitioned) {
numbers.insert(numbers.begin(), 0);
id = run(true);
}
// Append a separator
id.push_back(alphabet[0]);

// If adding a `partition` number did not make the length meet the
// `minLength` requirement, then make the new id this format:
// `prefix` character + a slice of the alphabet to make up the missing
// length + the rest of the ID without the `prefix` character
if (sqids->_minLength > id.size()) {
id = id[0] + alphabet.substr(0, sqids->_minLength - id.size()) + id.substr(1);
// For decoding: two separators next to each other is what tells us the
// rest are junk characters
while (sqids->_minLength - id.size() > 0) {
sqids->shuffle(alphabet);
id += alphabet.substr(0, std::min(sqids->_minLength - id.size(), alphabet.size()));
}
}

// if ID has a blocked word anywhere, add a throwaway number and start over
// if ID has a blocked word anywhere, restart with a +1 increment
if (sqids->isBlockedId(id)) {
if (partitioned) {
if (numbers[0] + 1 > sqids->maxValue) {
throw std::runtime_error("Ran out of range checking against the blocklist.");
} else {
numbers[0] += 1;
}
} else {
numbers.insert(numbers.begin(), 0);
}

id = run(true);
return run(increment + 1);
}

return id;
Expand Down
3 changes: 1 addition & 2 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ target_link_libraries(GTest::GTest INTERFACE gtest_main)

add_executable(
sqids_tests
alphabetTests.cpp blocklistTests.cpp encodingTests.cpp minLengthTests.cpp
uniquesTests.cpp)
alphabetTests.cpp blocklistTests.cpp encodingTests.cpp minLengthTests.cpp)

target_link_libraries(sqids_tests PRIVATE GTest::GTest sqids)

Expand Down
4 changes: 2 additions & 2 deletions tests/alphabetTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@ TEST(AlphabetTest, Simple) {
sqidscxx::Sqids<> sqids({ alphabet: "0123456789abcdef" });

const auto numbers(sqids.numbers({ 1, 2, 3 }));
const auto id = "4d9fd2";
const auto id = "489158";

EXPECT_EQ(sqids.encode(numbers), id);
EXPECT_EQ(sqids.decode(id), numbers);
}

TEST(AlphabetTest, ShortAlphabet) {
sqidscxx::Sqids<> sqids({ alphabet: "abcde" });
sqidscxx::Sqids<> sqids({ alphabet: "abc" });

const auto numbers(sqids.numbers({ 1, 2, 3 }));

Expand Down
69 changes: 43 additions & 26 deletions tests/blocklistTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,70 +4,87 @@
TEST(Blocklist, IfNoCustomBlocklistParamUseTheDefaultBlocklist) {
sqidscxx::Sqids<> sqids;

EXPECT_EQ(sqids.decode("sexy"), sqids.numbers({ 200044 }));
EXPECT_EQ(sqids.encode({ 200044 }), "d171vI");
EXPECT_EQ(sqids.decode("aho1e"), sqids.numbers({ 4572721 }));
EXPECT_EQ(sqids.encode({ 4572721 }), "JExTR");
}

TEST(Blocklist, IfAnEmptyBlocklistParamPassedDontUseAnyBlocklist) {
sqidscxx::Sqids<> sqids({ blocklist: {} });

EXPECT_EQ(sqids.decode("sexy"), sqids.numbers({ 200044 }));
EXPECT_EQ(sqids.encode({ 200044 }), "sexy");
EXPECT_EQ(sqids.decode("aho1e"), sqids.numbers({ 4572721 }));
EXPECT_EQ(sqids.encode({ 4572721 }), "aho1e");
}

TEST(Blocklist, IfANonEmptyBlocklistParamPassedUseOnlyThat) {
sqidscxx::Sqids<> sqids({ blocklist: {
"AvTg" // originally encoded [100000]
"ArUO" // originally encoded [100000]
} });

// Make sure we don't use the default blocklist
EXPECT_EQ(sqids.decode("sexy"), sqids.numbers({ 200044 }));
EXPECT_EQ(sqids.encode({ 200044 }), "sexy");
EXPECT_EQ(sqids.decode("aho1e"), sqids.numbers({ 4572721 }));
EXPECT_EQ(sqids.encode({ 4572721 }), "aho1e");

// Make sure we are using the passed blocklist
EXPECT_EQ(sqids.decode("AvTg"), sqids.numbers({ 100000 }));
EXPECT_EQ(sqids.encode({ 100000 }), "7T1X8k");
EXPECT_EQ(sqids.decode("7T1X8k"), sqids.numbers({ 100000 }));
EXPECT_EQ(sqids.decode("ArUO"), sqids.numbers({ 100000 }));
EXPECT_EQ(sqids.encode({ 100000 }), "QyG4");
EXPECT_EQ(sqids.decode("QyG4"), sqids.numbers({ 100000 }));
}

TEST(Blocklist, Blocklist) {
sqidscxx::Sqids<> sqids({ blocklist: {
"8QRLaD", // Normal result of 1st encoding -- let's block that word on purpose
"7T1cd0dL", // Result of 2nd encoding
"UeIe", // Result of 3rd encoding is `RA8UeIe7` -- let's block a substring
"imhw", // Result of 4th encoding is `WM3Limhw` -- let's block the postfix
"LfUQ" // Result of 4th encoding is `LfUQh4HN` -- let's block the prefix
"JSwXFaosAN", // Normal result of 1st encoding. Let's block that word on purpose
"OCjV9JK64o", // Result of 2nd encoding
"rBHf", // Result of 3rd encoding is `4rBHfOiqd3`. Let's block a substring
"79SM", // Result of 4th encoding is `dyhgw479SM`. Let's block the postfix
"7tE6" // Result of 4th encoding is `7tE6jdAHLe`. Let's block the prefix
} });

EXPECT_EQ(sqids.encode({ 1, 2, 3}), "TM0x1Mxz");
EXPECT_EQ(sqids.decode("TM0x1Mxz"), sqids.numbers({ 1, 2, 3 }));
EXPECT_EQ(sqids.encode({ 1'000'000, 2'000'000 }), "1aYeB7bRUt");
EXPECT_EQ(sqids.decode("1aYeB7bRUt"), sqids.numbers({ 1'000'000, 2'000'000 }));
}

TEST(Blocklist, DecodingBlocklistWordsShouldStillWork) {
sqidscxx::Sqids<> sqids({ blocklist: { "8QRLaD", "7T1cd0dL", "RA8UeIe7", "WM3Limhw", "LfUQh4HN" } });
sqidscxx::Sqids<> sqids({ blocklist: { "86Rf07", "se8ojk", "ARsz1p", "Q8AI49", "5sQRZO" } });

EXPECT_EQ(sqids.decode("8QRLaD"), sqids.numbers({ 1, 2, 3 }));
EXPECT_EQ(sqids.decode("7T1cd0dL"), sqids.numbers({ 1, 2, 3 }));
EXPECT_EQ(sqids.decode("RA8UeIe7"), sqids.numbers({ 1, 2, 3 }));
EXPECT_EQ(sqids.decode("WM3Limhw"), sqids.numbers({ 1, 2, 3 }));
EXPECT_EQ(sqids.decode("LfUQh4HN"), sqids.numbers({ 1, 2, 3 }));
EXPECT_EQ(sqids.decode("86Rf07"), sqids.numbers({ 1, 2, 3 }));
EXPECT_EQ(sqids.decode("se8ojk"), sqids.numbers({ 1, 2, 3 }));
EXPECT_EQ(sqids.decode("ARsz1p"), sqids.numbers({ 1, 2, 3 }));
EXPECT_EQ(sqids.decode("Q8AI49"), sqids.numbers({ 1, 2, 3 }));
EXPECT_EQ(sqids.decode("5sQRZO"), sqids.numbers({ 1, 2, 3 }));
}

TEST(Blocklist, MatchAgainstAShortBlocklistWord) {
sqidscxx::Sqids<> sqids({ blocklist: { "pPQ" } });
sqidscxx::Sqids<> sqids({ blocklist: { "pnd" } });

EXPECT_EQ(sqids.decode(sqids.encode({ 1000 })), sqids.numbers({ 1000 }));
}

TEST(Blocklist, BlocklistFilteringInConstructor) {
sqidscxx::Sqids<> sqids({
alphabet: "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
blocklist: { "sqnmpn" } // Lowercase blocklist in uppercase-only alphabet
blocklist: { "sxnzkl" } // Lowercase blocklist in uppercase-only alphabet
});

auto id = sqids.encode({ 1, 2, 3 });
auto numbers = sqids.decode(id);

EXPECT_EQ(id, "ULPBZGBM"); // Without blocklist, would've been "SQNMPN"
EXPECT_EQ(id, "IBSHOZ"); // Without blocklist, would've been "SXNZKL"
EXPECT_EQ(numbers, sqids.numbers({ 1, 2, 3 }));
}

TEST(Blocklist, MaxEncodingAttempts) {
std::string alphabet = "abc";
size_t minLength = 3;
std::set<std::string> blocklist = { "cab", "abc", "bca" };

sqidscxx::Sqids<> sqids({
alphabet: alphabet,
minLength: minLength,
blocklist: blocklist
});

EXPECT_EQ(alphabet.size(), minLength);
EXPECT_EQ(blocklist.size(), minLength);

ASSERT_THROW(sqids.encode({ 0 }), std::runtime_error);
}
Loading

0 comments on commit d47ba99

Please sign in to comment.