import type { Order } from '../constants';
import { CacheTTL } from '../constants';
import type { SearchesApi } from '@gen/wklr-backend-api/v1/api';
import type {
  RequestSearchQuery,
  SearchResult,
  SearchResultOrderByEnum,
  SearchResultOrderEnum,
} from '@gen/wklr-backend-api/v1/model';
import type { PartialSearchQuery } from '../types/SearchQuery';
export class SearchesRepository {
  constructor(private api: SearchesApi) {}

  async getCompleteSearch(q: string): Promise<string[]> {
    const res = await this.api.getCompleteSearch(q, {
      cache: { ttl: CacheTTL.DEFAULT },
    });
    return res.data;
  }

  async searchDocuments(
    query: PartialSearchQuery,
    perPage: number,
    page: number,
    orderValue?: Order['value'],
  ): Promise<SearchResult> {
    const q = JSON.stringify(query);
    const searchFrom = page * perPage;
    const { orderBy, order }: { orderBy?: SearchResultOrderByEnum; order?: SearchResultOrderEnum } =
      orderValue && 'orderBy' in orderValue ? orderValue : { orderBy: undefined, order: undefined };

    // searchDocuments が openapi-generator の定義上 object型である `q` を正常に url-encoded な string に変換してくれないので
    // string型を `RequestSearchQuery` に変換して渡す
    // ref: https://github.com/OpenAPITools/openapi-generator/issues/7564
    const results = await this.api.searchDocuments(q as RequestSearchQuery, searchFrom, perPage, orderBy, order, {
      cache: { ttl: CacheTTL.DEFAULT },
    });

    // Sanitization
    const entries = results.data.entries.map((entry) => {
      const hits = entry.hits.map((hit) => {
        let snippetReplaced = '';
        // <em>以外のHTMLタグを雑に除去
        // FIXME: is it really enough?
        snippetReplaced = hit.snippet.replace(/<.+?>/g, (x) => (x === '<em>' || x === '</em>' ? x : ''));

        // 同じ文字が6回以上繰り返されているの、だいたいPDFからの文字抽出で変な文字が入っているときだろうということで消してみる
        snippetReplaced = snippetReplaced.replace(/(.)\1{5,}/g, '$1$1');

        // 制御文字（\n以外）とU+FFFD REPLACEMENT CHARACTER消してみる
        // eslint-disable-next-line no-control-regex
        snippetReplaced = snippetReplaced.replace(/[\x00-\x09\x0b-\x1F�]/g, '');

        return { ...hit, snippet: snippetReplaced };
      });
      return { ...entry, hits };
    });

    return { ...results.data, entries };
  }
}
