Update to reflect changes in spec

sqids · Sep 23, 2023 · d47ba99 · d47ba99
1 parent 41639ab
commit d47ba99
Show file tree

Hide file tree

Showing 7 changed files with 166 additions and 213 deletions.
diff --git a/include/sqids/sqids.hpp b/include/sqids/sqids.hpp
@@ -29,8 +29,8 @@
 ///
 
 ///
-/// @file sqids.hpp
-/// @link https://github.com/sqids/sqids-cpp
+/// @file   sqids.hpp
+/// @link   https://github.com/sqids/sqids-cpp
 /// @author Heikki Johannes Hildén
 ///
 #pragma once
@@ -66,7 +66,7 @@ struct SqidsOptions
     ///
     /// The minimum allowed length of IDs.
     ///
-    size_t minLength = 0;
+    uint8_t minLength = 0;
 
     ///
     /// A list of words that must never appear in IDs.
@@ -111,9 +111,9 @@ class Sqids
     std::string encode(const std::vector<T>& numbers) const;
     std::vector<T> decode(const std::string& id) const;
 
-private:
     static constexpr T maxValue = std::numeric_limits<T>::max();
 
+private:
     struct Encoder
     {
         Encoder(const Sqids<T>* _sqids, const std::vector<T>& _numbers);
@@ -213,11 +213,6 @@ Sqids<T>::Sqids(const SqidsOptions& options)
         throw std::runtime_error("Alphabet must not contain duplicate characters.");
     }
 
-    // Minimum length cannot be greater than the alphabet size
-    if (options.minLength > alphabetSize) {
-        throw std::runtime_error("Minimum length cannot be greater than the alphabet size.");
-    }
-
     const std::string lowercaseAlphabet(lowercaseString(options.alphabet));
 
     // Clean up blocklist
@@ -351,7 +346,7 @@ typename std::vector<T> Sqids<T>::decode(const std::string& id) const
             numbers.push_back(toNumber(chunks[0], alphabet.substr(1)));
 
             // If this ID has multiple numbers, shuffle the alphabet, just as
-            // the encoding function did
+            // the encoding function does
             if (chunks.size() > 1) {
                 shuffle(alphabet);
             }
@@ -448,102 +443,77 @@ bool Sqids<T>::isBlockedId(const std::string& id) const
 }
 
 template<typename T>
-Sqids<T>::Encoder::Encoder(const Sqids<T>* _sqids, const Numbers& _numbers)
+Sqids<T>::Encoder::Encoder(const Sqids<T>* _sqids, const std::vector<T>& _numbers)
   : sqids(_sqids),
     numbers(_numbers)
 {
 }
 
 template<typename T>
-std::string Sqids<T>::Encoder::run(bool partitioned)
+std::string Sqids<T>::Encoder::run(unsigned int increment)
 {
-    // Get a semi-random offset from input numbers
     const size_t alphabetSize = sqids->_alphabet.size();
 
+    if (increment > alphabetSize) {
+        throw std::runtime_error("Reached max attempts to re-generate the ID.");
+    }
+
+    // Get a semi-random offset from input numbers
     auto a = numbers.size();
 
     for (unsigned int i = 0; i < numbers.size(); i++) {
         const T v = numbers[i];
         a += i + sqids->_alphabet[v % alphabetSize];
     }
 
-    const auto offset = a % alphabetSize;
+    const auto offset = (a + increment) % alphabetSize;
 
     // Re-arrange alphabet so that second-half goes in front of the first-half
     std::string alphabet(sqids->_alphabet.substr(offset) + sqids->_alphabet.substr(0, offset));
 
     // `prefix` is the first character in the generated ID, used for randomization
     const auto prefix = alphabet[0];
 
-    // `partition` is the character used instead of the first separator to
-    // indicate that the first number in the input array is a throwaway
-    // number. This character is used only once to handle blocklist and/or
-    // padding. it's omitted completely in all other cases
-    const auto partition = alphabet[1];
-
-    // The alphabet should not contain the `prefix` or the `partition` character
-    alphabet.erase(0, 2);
+    // Reverse alphabet
+    std::reverse(alphabet.begin(), alphabet.end());
 
     // The final ID will always have the `prefix` character at the beginning
     std::string id = { prefix };
 
     // Encode the input array
     for (auto it = numbers.cbegin(); it != numbers.cend(); ++it) {
-	// The last character of the alphabet is going to be reserved for the `separator`
-	const auto alphabetWithoutSeparator = alphabet.substr(0, alphabet.size() - 1);
+
+	// The first character of the alphabet is going to be reserved for the `separator`
+	const auto alphabetWithoutSeparator = alphabet.substr(1);
 
 	id += sqids->toId(*it, alphabetWithoutSeparator);
 
         // If not the last number
         if (std::next(it) != numbers.cend()) {
             // `separator` character is used to isolate numbers within the ID
-            const auto separator = alphabet[alphabet.size() - 1];
-
-	    // For the barrier, use the `separator` unless this is the first
-            // iteration and the first number is a throwaway number -- then use
-            // the `partition` character
-	    if (partitioned && it == numbers.cbegin()) {
-                id.push_back(partition);
-	    } else {
-                id.push_back(separator);
-	    }
+            id.push_back(alphabet[0]);
 
 	    // Shuffle on every iteration
 	    sqids->shuffle(alphabet);
         }
     }
 
-    // If `minLength` is used and the ID is too short, add a throwaway number
+    // Handle `minLength` requirement, if the ID is too short
     if (sqids->_minLength > id.size()) {
-        // Partitioning is required so we can safely throw away chunk of the ID
-        // during decoding
-        if (!partitioned) {
-            numbers.insert(numbers.begin(), 0);
-            id = run(true);
-        }
+        // Append a separator
+        id.push_back(alphabet[0]);
 
-        // If adding a `partition` number did not make the length meet the
-        // `minLength` requirement, then make the new id this format:
-        // `prefix` character + a slice of the alphabet to make up the missing
-        // length + the rest of the ID without the `prefix` character
-        if (sqids->_minLength > id.size()) {
-            id = id[0] + alphabet.substr(0, sqids->_minLength - id.size()) + id.substr(1);
+        // For decoding: two separators next to each other is what tells us the
+        // rest are junk characters
+        while (sqids->_minLength - id.size() > 0) {
+	    sqids->shuffle(alphabet);
+            id += alphabet.substr(0, std::min(sqids->_minLength - id.size(), alphabet.size()));
         }
     }
 
-    // if ID has a blocked word anywhere, add a throwaway number and start over
+    // if ID has a blocked word anywhere, restart with a +1 increment
     if (sqids->isBlockedId(id)) {
-        if (partitioned) {
-            if (numbers[0] + 1 > sqids->maxValue) {
-                throw std::runtime_error("Ran out of range checking against the blocklist.");
-            } else {
-                numbers[0] += 1;
-            }
-        } else {
-            numbers.insert(numbers.begin(), 0);
-        }
-
-        id = run(true);
+        return run(increment + 1);
     }
 
     return id;

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -16,8 +16,7 @@ target_link_libraries(GTest::GTest INTERFACE gtest_main)
 
 add_executable(
   sqids_tests
-  alphabetTests.cpp blocklistTests.cpp encodingTests.cpp minLengthTests.cpp 
-  uniquesTests.cpp)
+  alphabetTests.cpp blocklistTests.cpp encodingTests.cpp minLengthTests.cpp)
 
 target_link_libraries(sqids_tests PRIVATE GTest::GTest sqids)
 

diff --git a/tests/alphabetTests.cpp b/tests/alphabetTests.cpp
@@ -5,14 +5,14 @@ TEST(AlphabetTest, Simple) {
     sqidscxx::Sqids<> sqids({ alphabet: "0123456789abcdef" });
 
     const auto numbers(sqids.numbers({ 1, 2, 3 }));
-    const auto id = "4d9fd2";
+    const auto id = "489158";
 
     EXPECT_EQ(sqids.encode(numbers), id);
     EXPECT_EQ(sqids.decode(id), numbers);
 }
 
 TEST(AlphabetTest, ShortAlphabet) {
-    sqidscxx::Sqids<> sqids({ alphabet: "abcde" });
+    sqidscxx::Sqids<> sqids({ alphabet: "abc" });
 
     const auto numbers(sqids.numbers({ 1, 2, 3 }));
 

diff --git a/tests/blocklistTests.cpp b/tests/blocklistTests.cpp
@@ -4,70 +4,87 @@
 TEST(Blocklist, IfNoCustomBlocklistParamUseTheDefaultBlocklist) {
     sqidscxx::Sqids<> sqids;
 
-    EXPECT_EQ(sqids.decode("sexy"), sqids.numbers({ 200044 }));
-    EXPECT_EQ(sqids.encode({ 200044 }), "d171vI");
+    EXPECT_EQ(sqids.decode("aho1e"), sqids.numbers({ 4572721 }));
+    EXPECT_EQ(sqids.encode({ 4572721 }), "JExTR");
 }
 
 TEST(Blocklist, IfAnEmptyBlocklistParamPassedDontUseAnyBlocklist) {
     sqidscxx::Sqids<> sqids({ blocklist: {} });
 
-    EXPECT_EQ(sqids.decode("sexy"), sqids.numbers({ 200044 }));
-    EXPECT_EQ(sqids.encode({ 200044 }), "sexy");
+    EXPECT_EQ(sqids.decode("aho1e"), sqids.numbers({ 4572721 }));
+    EXPECT_EQ(sqids.encode({ 4572721 }), "aho1e");
 }
 
 TEST(Blocklist, IfANonEmptyBlocklistParamPassedUseOnlyThat) {
     sqidscxx::Sqids<> sqids({ blocklist: { 
-        "AvTg"  // originally encoded [100000]
+        "ArUO"  // originally encoded [100000]
     } });
 
     // Make sure we don't use the default blocklist
-    EXPECT_EQ(sqids.decode("sexy"), sqids.numbers({ 200044 }));
-    EXPECT_EQ(sqids.encode({ 200044 }), "sexy");
+    EXPECT_EQ(sqids.decode("aho1e"), sqids.numbers({ 4572721 }));
+    EXPECT_EQ(sqids.encode({ 4572721 }), "aho1e");
 
     // Make sure we are using the passed blocklist
-    EXPECT_EQ(sqids.decode("AvTg"), sqids.numbers({ 100000 }));
-    EXPECT_EQ(sqids.encode({ 100000 }), "7T1X8k");
-    EXPECT_EQ(sqids.decode("7T1X8k"), sqids.numbers({ 100000 }));
+    EXPECT_EQ(sqids.decode("ArUO"), sqids.numbers({ 100000 }));
+    EXPECT_EQ(sqids.encode({ 100000 }), "QyG4");
+    EXPECT_EQ(sqids.decode("QyG4"), sqids.numbers({ 100000 }));
 }
 
 TEST(Blocklist, Blocklist) {
     sqidscxx::Sqids<> sqids({ blocklist: { 
-        "8QRLaD",    // Normal result of 1st encoding -- let's block that word on purpose
-        "7T1cd0dL",  // Result of 2nd encoding
-        "UeIe",      // Result of 3rd encoding is `RA8UeIe7` -- let's block a substring
-        "imhw",      // Result of 4th encoding is `WM3Limhw` -- let's block the postfix
-        "LfUQ"       // Result of 4th encoding is `LfUQh4HN` -- let's block the prefix
+        "JSwXFaosAN",  // Normal result of 1st encoding. Let's block that word on purpose
+        "OCjV9JK64o",  // Result of 2nd encoding
+        "rBHf",        // Result of 3rd encoding is `4rBHfOiqd3`. Let's block a substring
+        "79SM",        // Result of 4th encoding is `dyhgw479SM`. Let's block the postfix
+        "7tE6"         // Result of 4th encoding is `7tE6jdAHLe`. Let's block the prefix
     } });
 
-    EXPECT_EQ(sqids.encode({ 1, 2, 3}), "TM0x1Mxz");
-    EXPECT_EQ(sqids.decode("TM0x1Mxz"), sqids.numbers({ 1, 2, 3 }));
+    EXPECT_EQ(sqids.encode({ 1'000'000, 2'000'000 }), "1aYeB7bRUt");
+    EXPECT_EQ(sqids.decode("1aYeB7bRUt"), sqids.numbers({ 1'000'000, 2'000'000 }));
 }
 
 TEST(Blocklist, DecodingBlocklistWordsShouldStillWork) {
-    sqidscxx::Sqids<> sqids({ blocklist: { "8QRLaD", "7T1cd0dL", "RA8UeIe7", "WM3Limhw", "LfUQh4HN" } });
+    sqidscxx::Sqids<> sqids({ blocklist: { "86Rf07", "se8ojk", "ARsz1p", "Q8AI49", "5sQRZO" } });
 
-    EXPECT_EQ(sqids.decode("8QRLaD"), sqids.numbers({ 1, 2, 3 }));
-    EXPECT_EQ(sqids.decode("7T1cd0dL"), sqids.numbers({ 1, 2, 3 }));
-    EXPECT_EQ(sqids.decode("RA8UeIe7"), sqids.numbers({ 1, 2, 3 }));
-    EXPECT_EQ(sqids.decode("WM3Limhw"), sqids.numbers({ 1, 2, 3 }));
-    EXPECT_EQ(sqids.decode("LfUQh4HN"), sqids.numbers({ 1, 2, 3 }));
+    EXPECT_EQ(sqids.decode("86Rf07"), sqids.numbers({ 1, 2, 3 }));
+    EXPECT_EQ(sqids.decode("se8ojk"), sqids.numbers({ 1, 2, 3 }));
+    EXPECT_EQ(sqids.decode("ARsz1p"), sqids.numbers({ 1, 2, 3 }));
+    EXPECT_EQ(sqids.decode("Q8AI49"), sqids.numbers({ 1, 2, 3 }));
+    EXPECT_EQ(sqids.decode("5sQRZO"), sqids.numbers({ 1, 2, 3 }));
 }
 
 TEST(Blocklist, MatchAgainstAShortBlocklistWord) {
-    sqidscxx::Sqids<> sqids({ blocklist: { "pPQ" } });
+    sqidscxx::Sqids<> sqids({ blocklist: { "pnd" } });
 
     EXPECT_EQ(sqids.decode(sqids.encode({ 1000 })), sqids.numbers({ 1000 }));
 }
 
 TEST(Blocklist, BlocklistFilteringInConstructor) {
     sqidscxx::Sqids<> sqids({ 
         alphabet: "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
-        blocklist: { "sqnmpn" }  // Lowercase blocklist in uppercase-only alphabet
+        blocklist: { "sxnzkl" }  // Lowercase blocklist in uppercase-only alphabet
     });
 
     auto id = sqids.encode({ 1, 2, 3 });
     auto numbers = sqids.decode(id);
 
-    EXPECT_EQ(id, "ULPBZGBM");  // Without blocklist, would've been "SQNMPN"
+    EXPECT_EQ(id, "IBSHOZ");  // Without blocklist, would've been "SXNZKL"
     EXPECT_EQ(numbers, sqids.numbers({ 1, 2, 3 }));
 }
+
+TEST(Blocklist, MaxEncodingAttempts) {
+    std::string alphabet = "abc";
+    size_t minLength = 3;
+    std::set<std::string> blocklist = { "cab", "abc", "bca" };
+
+    sqidscxx::Sqids<> sqids({ 
+        alphabet: alphabet,
+        minLength: minLength,
+        blocklist: blocklist
+    });
+
+    EXPECT_EQ(alphabet.size(), minLength);
+    EXPECT_EQ(blocklist.size(), minLength);
+
+    ASSERT_THROW(sqids.encode({ 0 }), std::runtime_error);
+}