sotanishy's competitive programming library

sotanishy's code snippets for competitive programming

View the Project on GitHub sotanishy/cp-library-cpp

:heavy_check_mark: Longest Common Substring
(string/longest_common_substring.hpp)

Depends on

Verified with

Code

#pragma once
#include <algorithm>
#include <string>

#include "lcp_array.hpp"
#include "suffix_array.hpp"

/**
 * @brief Longest Common Substring
 */
std::tuple<int, int, int, int> longest_common_substring(const std::string& s,
                                                        const std::string& t) {
    const int n = s.size();
    auto st = s + "$" + t;
    auto sa = suffix_array(st);
    auto lcp = lcp_array(st, sa);
    std::pair<int, std::tuple<int, int, int, int>> ans(0, {0, 0, 0, 0});
    for (int i = 0; i < (int)st.size() - 1; ++i) {
        int len = lcp[i];
        if (len == 0) continue;
        if (sa[i] < n && sa[i + 1] >= n + 1) {
            int a = sa[i], c = sa[i + 1] - n - 1;
            ans = std::max(ans, {len, {a, a + len, c, c + len}});
        } else if (sa[i] >= n + 1 && sa[i + 1] < n) {
            int a = sa[i + 1], c = sa[i] - n - 1;
            ans = std::max(ans, {len, {a, a + len, c, c + len}});
        }
    }
    return ans.second;
}
#line 2 "string/longest_common_substring.hpp"
#include <algorithm>
#include <string>

#line 3 "string/lcp_array.hpp"
#include <vector>

template <typename T>
std::vector<int> lcp_array(const std::vector<T>& s,
                           const std::vector<int>& sa) {
    const int n = s.size();
    std::vector<int> rank(n);
    for (int i = 0; i < n; ++i) rank[sa[i]] = i;
    int h = 0;
    std::vector<int> lcp(n - 1);
    for (int i = 0; i < n; ++i) {
        if (h > 0) --h;
        if (rank[i] == 0) continue;
        int j = sa[rank[i] - 1];
        while (j + h < n && i + h < n && s[j + h] == s[i + h]) ++h;
        lcp[rank[i] - 1] = h;
    }
    return lcp;
}

std::vector<int> lcp_array(const std::string& s, const std::vector<int>& sa) {
    return lcp_array(std::vector<char>(s.begin(), s.end()), sa);
}
#line 3 "string/suffix_array.hpp"
#include <numeric>
#line 6 "string/suffix_array.hpp"

template <typename T>
std::vector<int> suffix_array(const std::vector<T>& s) {
    const int n = s.size();
    std::vector<int> sa(n);
    std::iota(sa.begin(), sa.end(), 0);
    std::ranges::sort(sa, {}, [&](int i) { return s[i]; });
    int cl = 0;
    std::vector<int> rank(n);
    for (int i = 1; i < n; ++i) {
        if (s[sa[i - 1]] != s[sa[i]]) ++cl;
        rank[sa[i]] = cl;
    }
    std::vector<int> tmp(n), nrank(n), cnt(n);
    for (int k = 1; k < n; k <<= 1) {
        // sort by second half
        int cnt1 = 0, cnt2 = k;
        for (int i = 0; i < n; ++i) {
            int j = sa[i] - k;
            if (j >= 0)
                tmp[cnt2++] = j;
            else
                tmp[cnt1++] = j + n;
        }

        // sort by first half
        std::ranges::fill(cnt, 0);
        for (int i = 0; i < n; ++i) ++cnt[rank[tmp[i]]];
        for (int i = 1; i < n; ++i) cnt[i] += cnt[i - 1];
        for (int i = n - 1; i >= 0; --i) sa[--cnt[rank[tmp[i]]]] = tmp[i];

        // assign new rank
        nrank[sa[0]] = 0;
        cl = 0;
        for (int i = 1; i < n; ++i) {
            if (rank[sa[i - 1]] != rank[sa[i]] ||
                (sa[i - 1] + k < n ? rank[sa[i - 1] + k] : -1) !=
                    (sa[i] + k < n ? rank[sa[i] + k] : -1)) {
                ++cl;
            }
            nrank[sa[i]] = cl;
        }
        rank.swap(nrank);
    }
    return sa;
}

std::vector<int> suffix_array(const std::string& s) {
    return suffix_array(std::vector<char>(s.begin(), s.end()));
}

template <typename T>
std::vector<int> cyclic_suffix_array(const std::vector<T>& s) {
    const int n = s.size();
    std::vector<int> sa(n);
    std::iota(sa.begin(), sa.end(), 0);
    std::ranges::sort(sa, {}, [&](int i) { return s[i]; });
    int cl = 0;
    std::vector<int> rank(n);
    for (int i = 1; i < n; ++i) {
        if (s[sa[i - 1]] != s[sa[i]]) ++cl;
        rank[sa[i]] = cl;
    }
    std::vector<int> tmp(n), nrank(n), cnt(n);
    for (int k = 1; k < n; k <<= 1) {
        // sort by second half
        int cnt1 = 0, cnt2 = k;
        for (int i = 0; i < n; ++i) {
            int j = sa[i] - k;
            if (j >= 0)
                tmp[cnt2++] = j;
            else
                tmp[cnt1++] = j + n;
        }

        // sort by first half
        std::ranges::fill(cnt, 0);
        for (int i = 0; i < n; ++i) ++cnt[rank[tmp[i]]];
        for (int i = 1; i < n; ++i) cnt[i] += cnt[i - 1];
        for (int i = n - 1; i >= 0; --i) sa[--cnt[rank[tmp[i]]]] = tmp[i];

        // assign new rank
        nrank[sa[0]] = 0;
        cl = 0;
        for (int i = 1; i < n; ++i) {
            if (rank[sa[i - 1]] != rank[sa[i]] ||
                rank[(sa[i - 1] + k) % n] != rank[(sa[i] + k) % n]) {
                ++cl;
            }
            nrank[sa[i]] = cl;
        }
        rank.swap(nrank);
    }
    return sa;
}

std::vector<int> cyclic_suffix_array(const std::string& s) {
    return cyclic_suffix_array(std::vector<char>(s.begin(), s.end()));
}

/*
// comparator for substrings
// used for string matching with the suffix array

auto cmp = [&](int si, const string& t) {
    int sn = S.size(), tn = t.size();
    int ti = 0;
    for (; si < sn && ti < tn; ++si, ++ti) {
        if (T[si] < t[ti]) return true;
        if (T[si] > t[ti]) return false;
    }
    return si == sn && ti < tn;
};
*/
#line 7 "string/longest_common_substring.hpp"

/**
 * @brief Longest Common Substring
 */
std::tuple<int, int, int, int> longest_common_substring(const std::string& s,
                                                        const std::string& t) {
    const int n = s.size();
    auto st = s + "$" + t;
    auto sa = suffix_array(st);
    auto lcp = lcp_array(st, sa);
    std::pair<int, std::tuple<int, int, int, int>> ans(0, {0, 0, 0, 0});
    for (int i = 0; i < (int)st.size() - 1; ++i) {
        int len = lcp[i];
        if (len == 0) continue;
        if (sa[i] < n && sa[i + 1] >= n + 1) {
            int a = sa[i], c = sa[i + 1] - n - 1;
            ans = std::max(ans, {len, {a, a + len, c, c + len}});
        } else if (sa[i] >= n + 1 && sa[i + 1] < n) {
            int a = sa[i + 1], c = sa[i] - n - 1;
            ans = std::max(ans, {len, {a, a + len, c, c + len}});
        }
    }
    return ans.second;
}
Back to top page