| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395 | 'use client'import { useTranslation } from 'react-i18next'import { formatFileSize, formatNumber, formatTime } from '@/utils/format'import type { DocType } from '@/models/datasets'import useTimestamp from '@/hooks/use-timestamp'export type inputType = 'input' | 'select' | 'textarea'export type metadataType = DocType | 'originInfo' | 'technicalParameters'type MetadataMap =    Record<    metadataType,    {      text: string      allowEdit?: boolean      icon?: React.ReactNode      iconName?: string      subFieldsMap: Record<      string,      {        label: string        inputType?: inputType        field?: string        render?: (value: any, total?: number) => React.ReactNode | string      }      >    }    >const fieldPrefix = 'datasetDocuments.metadata.field'export const useMetadataMap = (): MetadataMap => {  const { t } = useTranslation()  const { formatTime: formatTimestamp } = useTimestamp()  return {    book: {      text: t('datasetDocuments.metadata.type.book'),      iconName: 'bookOpen',      subFieldsMap: {        title: { label: t(`${fieldPrefix}.book.title`) },        language: {          label: t(`${fieldPrefix}.book.language`),          inputType: 'select',        },        author: { label: t(`${fieldPrefix}.book.author`) },        publisher: { label: t(`${fieldPrefix}.book.publisher`) },        publication_date: { label: t(`${fieldPrefix}.book.publicationDate`) },        isbn: { label: t(`${fieldPrefix}.book.ISBN`) },        category: {          label: t(`${fieldPrefix}.book.category`),          inputType: 'select',        },      },    },    web_page: {      text: t('datasetDocuments.metadata.type.webPage'),      iconName: 'globe',      subFieldsMap: {        'title': { label: t(`${fieldPrefix}.webPage.title`) },        'url': { label: t(`${fieldPrefix}.webPage.url`) },        'language': {          label: t(`${fieldPrefix}.webPage.language`),          inputType: 'select',        },        'author/publisher': { label: t(`${fieldPrefix}.webPage.authorPublisher`) },        'publish_date': { label: t(`${fieldPrefix}.webPage.publishDate`) },        'topics/keywords': { label: t(`${fieldPrefix}.webPage.topicsKeywords`) },        'description': { label: t(`${fieldPrefix}.webPage.description`) },      },    },    paper: {      text: t('datasetDocuments.metadata.type.paper'),      iconName: 'graduationHat',      subFieldsMap: {        'title': { label: t(`${fieldPrefix}.paper.title`) },        'language': {          label: t(`${fieldPrefix}.paper.language`),          inputType: 'select',        },        'author': { label: t(`${fieldPrefix}.paper.author`) },        'publish_date': { label: t(`${fieldPrefix}.paper.publishDate`) },        'journal/conference_name': {          label: t(`${fieldPrefix}.paper.journalConferenceName`),        },        'volume/issue/page_numbers': { label: t(`${fieldPrefix}.paper.volumeIssuePage`) },        'doi': { label: t(`${fieldPrefix}.paper.DOI`) },        'topics/keywords': { label: t(`${fieldPrefix}.paper.topicsKeywords`) },        'abstract': {          label: t(`${fieldPrefix}.paper.abstract`),          inputType: 'textarea',        },      },    },    social_media_post: {      text: t('datasetDocuments.metadata.type.socialMediaPost'),      iconName: 'atSign',      subFieldsMap: {        'platform': { label: t(`${fieldPrefix}.socialMediaPost.platform`) },        'author/username': {          label: t(`${fieldPrefix}.socialMediaPost.authorUsername`),        },        'publish_date': { label: t(`${fieldPrefix}.socialMediaPost.publishDate`) },        'post_url': { label: t(`${fieldPrefix}.socialMediaPost.postURL`) },        'topics/tags': { label: t(`${fieldPrefix}.socialMediaPost.topicsTags`) },      },    },    personal_document: {      text: t('datasetDocuments.metadata.type.personalDocument'),      iconName: 'file',      subFieldsMap: {        'title': { label: t(`${fieldPrefix}.personalDocument.title`) },        'author': { label: t(`${fieldPrefix}.personalDocument.author`) },        'creation_date': {          label: t(`${fieldPrefix}.personalDocument.creationDate`),        },        'last_modified_date': {          label: t(`${fieldPrefix}.personalDocument.lastModifiedDate`),        },        'document_type': {          label: t(`${fieldPrefix}.personalDocument.documentType`),          inputType: 'select',        },        'tags/category': {          label: t(`${fieldPrefix}.personalDocument.tagsCategory`),        },      },    },    business_document: {      text: t('datasetDocuments.metadata.type.businessDocument'),      iconName: 'briefcase',      subFieldsMap: {        'title': { label: t(`${fieldPrefix}.businessDocument.title`) },        'author': { label: t(`${fieldPrefix}.businessDocument.author`) },        'creation_date': {          label: t(`${fieldPrefix}.businessDocument.creationDate`),        },        'last_modified_date': {          label: t(`${fieldPrefix}.businessDocument.lastModifiedDate`),        },        'document_type': {          label: t(`${fieldPrefix}.businessDocument.documentType`),          inputType: 'select',        },        'department/team': {          label: t(`${fieldPrefix}.businessDocument.departmentTeam`),        },      },    },    im_chat_log: {      text: t('datasetDocuments.metadata.type.IMChat'),      iconName: 'messageTextCircle',      subFieldsMap: {        'chat_platform': { label: t(`${fieldPrefix}.IMChat.chatPlatform`) },        'chat_participants/group_name': {          label: t(`${fieldPrefix}.IMChat.chatPartiesGroupName`),        },        'start_date': { label: t(`${fieldPrefix}.IMChat.startDate`) },        'end_date': { label: t(`${fieldPrefix}.IMChat.endDate`) },        'participants': { label: t(`${fieldPrefix}.IMChat.participants`) },        'topicsKeywords': {          label: t(`${fieldPrefix}.IMChat.topicsKeywords`),          inputType: 'textarea',        },        'fileType': { label: t(`${fieldPrefix}.IMChat.fileType`) },      },    },    wikipedia_entry: {      text: t('datasetDocuments.metadata.type.wikipediaEntry'),      allowEdit: false,      subFieldsMap: {        'title': { label: t(`${fieldPrefix}.wikipediaEntry.title`) },        'language': {          label: t(`${fieldPrefix}.wikipediaEntry.language`),          inputType: 'select',        },        'web_page_url': { label: t(`${fieldPrefix}.wikipediaEntry.webpageURL`) },        'editor/contributor': {          label: t(`${fieldPrefix}.wikipediaEntry.editorContributor`),        },        'last_edit_date': {          label: t(`${fieldPrefix}.wikipediaEntry.lastEditDate`),        },        'summary/introduction': {          label: t(`${fieldPrefix}.wikipediaEntry.summaryIntroduction`),          inputType: 'textarea',        },      },    },    synced_from_notion: {      text: t('datasetDocuments.metadata.type.notion'),      allowEdit: false,      subFieldsMap: {        'title': { label: t(`${fieldPrefix}.notion.title`) },        'language': { label: t(`${fieldPrefix}.notion.lang`), inputType: 'select' },        'author/creator': { label: t(`${fieldPrefix}.notion.author`) },        'creation_date': { label: t(`${fieldPrefix}.notion.createdTime`) },        'last_modified_date': {          label: t(`${fieldPrefix}.notion.lastModifiedTime`),        },        'notion_page_link': { label: t(`${fieldPrefix}.notion.url`) },        'category/tags': { label: t(`${fieldPrefix}.notion.tag`) },        'description': { label: t(`${fieldPrefix}.notion.desc`) },      },    },    synced_from_github: {      text: t('datasetDocuments.metadata.type.github'),      allowEdit: false,      subFieldsMap: {        'repository_name': { label: t(`${fieldPrefix}.github.repoName`) },        'repository_description': { label: t(`${fieldPrefix}.github.repoDesc`) },        'repository_owner/organization': { label: t(`${fieldPrefix}.github.repoOwner`) },        'code_filename': { label: t(`${fieldPrefix}.github.fileName`) },        'code_file_path': { label: t(`${fieldPrefix}.github.filePath`) },        'programming_language': { label: t(`${fieldPrefix}.github.programmingLang`) },        'github_link': { label: t(`${fieldPrefix}.github.url`) },        'open_source_license': { label: t(`${fieldPrefix}.github.license`) },        'commit_date': { label: t(`${fieldPrefix}.github.lastCommitTime`) },        'commit_author': {          label: t(`${fieldPrefix}.github.lastCommitAuthor`),        },      },    },    originInfo: {      text: '',      allowEdit: false,      subFieldsMap: {        'name': { label: t(`${fieldPrefix}.originInfo.originalFilename`) },        'data_source_info.upload_file.size': {          label: t(`${fieldPrefix}.originInfo.originalFileSize`),          render: value => formatFileSize(value),        },        'created_at': {          label: t(`${fieldPrefix}.originInfo.uploadDate`),          render: value => formatTimestamp(value, t('datasetDocuments.metadata.dateTimeFormat') as string),        },        'completed_at': {          label: t(`${fieldPrefix}.originInfo.lastUpdateDate`),          render: value => formatTimestamp(value, t('datasetDocuments.metadata.dateTimeFormat') as string),        },        'data_source_type': {          label: t(`${fieldPrefix}.originInfo.source`),          render: value => t(`datasetDocuments.metadata.source.${value}`),        },      },    },    technicalParameters: {      text: t('datasetDocuments.metadata.type.technicalParameters'),      allowEdit: false,      subFieldsMap: {        'dataset_process_rule.mode': {          label: t(`${fieldPrefix}.technicalParameters.segmentSpecification`),          render: value => value === 'automatic' ? (t('datasetDocuments.embedding.automatic') as string) : (t('datasetDocuments.embedding.custom') as string),        },        'dataset_process_rule.rules.segmentation.max_tokens': {          label: t(`${fieldPrefix}.technicalParameters.segmentLength`),          render: value => formatNumber(value),        },        'average_segment_length': {          label: t(`${fieldPrefix}.technicalParameters.avgParagraphLength`),          render: value => `${formatNumber(value)} characters`,        },        'segment_count': {          label: t(`${fieldPrefix}.technicalParameters.paragraphs`),          render: value => `${formatNumber(value)} paragraphs`,        },        'hit_count': {          label: t(`${fieldPrefix}.technicalParameters.hitCount`),          render: (value, total) => {            const v = value || 0            return `${!total ? 0 : ((v / total) * 100).toFixed(2)}% (${v}/${total})`          },        },        'indexing_latency': {          label: t(`${fieldPrefix}.technicalParameters.embeddingTime`),          render: value => formatTime(value),        },        'tokens': {          label: t(`${fieldPrefix}.technicalParameters.embeddedSpend`),          render: value => `${formatNumber(value)} tokens`,        },      },    },  }}const langPrefix = 'datasetDocuments.metadata.languageMap.'export const useLanguages = () => {  const { t } = useTranslation()  return {    zh: t(`${langPrefix}zh`),    en: t(`${langPrefix}en`),    es: t(`${langPrefix}es`),    fr: t(`${langPrefix}fr`),    de: t(`${langPrefix}de`),    ja: t(`${langPrefix}ja`),    ko: t(`${langPrefix}ko`),    ru: t(`${langPrefix}ru`),    ar: t(`${langPrefix}ar`),    pt: t(`${langPrefix}pt`),    it: t(`${langPrefix}it`),    nl: t(`${langPrefix}nl`),    pl: t(`${langPrefix}pl`),    sv: t(`${langPrefix}sv`),    tr: t(`${langPrefix}tr`),    he: t(`${langPrefix}he`),    hi: t(`${langPrefix}hi`),    da: t(`${langPrefix}da`),    fi: t(`${langPrefix}fi`),    no: t(`${langPrefix}no`),    hu: t(`${langPrefix}hu`),    el: t(`${langPrefix}el`),    cs: t(`${langPrefix}cs`),    th: t(`${langPrefix}th`),    id: t(`${langPrefix}id`),    ro: t(`${langPrefix}ro`),  }}const bookCategoryPrefix = 'datasetDocuments.metadata.categoryMap.book.'export const useBookCategories = () => {  const { t } = useTranslation()  return {    fiction: t(`${bookCategoryPrefix}fiction`),    biography: t(`${bookCategoryPrefix}biography`),    history: t(`${bookCategoryPrefix}history`),    science: t(`${bookCategoryPrefix}science`),    technology: t(`${bookCategoryPrefix}technology`),    education: t(`${bookCategoryPrefix}education`),    philosophy: t(`${bookCategoryPrefix}philosophy`),    religion: t(`${bookCategoryPrefix}religion`),    socialSciences: t(`${bookCategoryPrefix}socialSciences`),    art: t(`${bookCategoryPrefix}art`),    travel: t(`${bookCategoryPrefix}travel`),    health: t(`${bookCategoryPrefix}health`),    selfHelp: t(`${bookCategoryPrefix}selfHelp`),    businessEconomics: t(`${bookCategoryPrefix}businessEconomics`),    cooking: t(`${bookCategoryPrefix}cooking`),    childrenYoungAdults: t(`${bookCategoryPrefix}childrenYoungAdults`),    comicsGraphicNovels: t(`${bookCategoryPrefix}comicsGraphicNovels`),    poetry: t(`${bookCategoryPrefix}poetry`),    drama: t(`${bookCategoryPrefix}drama`),    other: t(`${bookCategoryPrefix}other`),  }}const personalDocCategoryPrefix  = 'datasetDocuments.metadata.categoryMap.personalDoc.'export const usePersonalDocCategories = () => {  const { t } = useTranslation()  return {    notes: t(`${personalDocCategoryPrefix}notes`),    blogDraft: t(`${personalDocCategoryPrefix}blogDraft`),    diary: t(`${personalDocCategoryPrefix}diary`),    researchReport: t(`${personalDocCategoryPrefix}researchReport`),    bookExcerpt: t(`${personalDocCategoryPrefix}bookExcerpt`),    schedule: t(`${personalDocCategoryPrefix}schedule`),    list: t(`${personalDocCategoryPrefix}list`),    projectOverview: t(`${personalDocCategoryPrefix}projectOverview`),    photoCollection: t(`${personalDocCategoryPrefix}photoCollection`),    creativeWriting: t(`${personalDocCategoryPrefix}creativeWriting`),    codeSnippet: t(`${personalDocCategoryPrefix}codeSnippet`),    designDraft: t(`${personalDocCategoryPrefix}designDraft`),    personalResume: t(`${personalDocCategoryPrefix}personalResume`),    other: t(`${personalDocCategoryPrefix}other`),  }}const businessDocCategoryPrefix  = 'datasetDocuments.metadata.categoryMap.businessDoc.'export const useBusinessDocCategories = () => {  const { t } = useTranslation()  return {    meetingMinutes: t(`${businessDocCategoryPrefix}meetingMinutes`),    researchReport: t(`${businessDocCategoryPrefix}researchReport`),    proposal: t(`${businessDocCategoryPrefix}proposal`),    employeeHandbook: t(`${businessDocCategoryPrefix}employeeHandbook`),    trainingMaterials: t(`${businessDocCategoryPrefix}trainingMaterials`),    requirementsDocument: t(`${businessDocCategoryPrefix}requirementsDocument`),    designDocument: t(`${businessDocCategoryPrefix}designDocument`),    productSpecification: t(`${businessDocCategoryPrefix}productSpecification`),    financialReport: t(`${businessDocCategoryPrefix}financialReport`),    marketAnalysis: t(`${businessDocCategoryPrefix}marketAnalysis`),    projectPlan: t(`${businessDocCategoryPrefix}projectPlan`),    teamStructure: t(`${businessDocCategoryPrefix}teamStructure`),    policiesProcedures: t(`${businessDocCategoryPrefix}policiesProcedures`),    contractsAgreements: t(`${businessDocCategoryPrefix}contractsAgreements`),    emailCorrespondence: t(`${businessDocCategoryPrefix}emailCorrespondence`),    other: t(`${businessDocCategoryPrefix}other`),  }}
 |